Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.9.4.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 *   Some of the source code in this file came from fs/cifs/cifs_unicode.c
  4 *
  5 *   Copyright (c) International Business Machines  Corp., 2000,2009
  6 *   Modified by Steve French (sfrench@us.ibm.com)
  7 *   Modified by Namjae Jeon (linkinjeon@kernel.org)
  8 */
  9#include <linux/fs.h>
 10#include <linux/slab.h>
 11#include <asm/unaligned.h>
 12#include "glob.h"
 13#include "unicode.h"
 14#include "uniupr.h"
 15#include "smb_common.h"
 16
 17/*
 18 * smb_utf16_bytes() - how long will a string be after conversion?
 19 * @from:	pointer to input string
 20 * @maxbytes:	don't go past this many bytes of input string
 21 * @codepage:	destination codepage
 22 *
 23 * Walk a utf16le string and return the number of bytes that the string will
 24 * be after being converted to the given charset, not including any null
 25 * termination required. Don't walk past maxbytes in the source buffer.
 26 *
 27 * Return:	string length after conversion
 28 */
 29static int smb_utf16_bytes(const __le16 *from, int maxbytes,
 30			   const struct nls_table *codepage)
 31{
 32	int i;
 33	int charlen, outlen = 0;
 34	int maxwords = maxbytes / 2;
 35	char tmp[NLS_MAX_CHARSET_SIZE];
 36	__u16 ftmp;
 37
 38	for (i = 0; i < maxwords; i++) {
 39		ftmp = get_unaligned_le16(&from[i]);
 40		if (ftmp == 0)
 41			break;
 42
 43		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
 44		if (charlen > 0)
 45			outlen += charlen;
 46		else
 47			outlen++;
 48	}
 49
 50	return outlen;
 51}
 52
 53/*
 54 * cifs_mapchar() - convert a host-endian char to proper char in codepage
 55 * @target:	where converted character should be copied
 56 * @src_char:	2 byte host-endian source character
 57 * @cp:		codepage to which character should be converted
 58 * @mapchar:	should character be mapped according to mapchars mount option?
 59 *
 60 * This function handles the conversion of a single character. It is the
 61 * responsibility of the caller to ensure that the target buffer is large
 62 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 63 *
 64 * Return:	string length after conversion
 65 */
 66static int
 67cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 68	     bool mapchar)
 69{
 70	int len = 1;
 71
 72	if (!mapchar)
 73		goto cp_convert;
 74
 75	/*
 76	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
 77	 *     build_path_from_dentry are modified, as they use slash as
 78	 *     separator.
 79	 */
 80	switch (src_char) {
 81	case UNI_COLON:
 82		*target = ':';
 83		break;
 84	case UNI_ASTERISK:
 85		*target = '*';
 86		break;
 87	case UNI_QUESTION:
 88		*target = '?';
 89		break;
 90	case UNI_PIPE:
 91		*target = '|';
 92		break;
 93	case UNI_GRTRTHAN:
 94		*target = '>';
 95		break;
 96	case UNI_LESSTHAN:
 97		*target = '<';
 98		break;
 99	default:
100		goto cp_convert;
101	}
102
103out:
104	return len;
105
106cp_convert:
107	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
108	if (len <= 0) {
109		*target = '?';
110		len = 1;
111	}
112
113	goto out;
114}
115
116/*
117 * is_char_allowed() - check for valid character
118 * @ch:		input character to be checked
119 *
120 * Return:	1 if char is allowed, otherwise 0
121 */
122static inline int is_char_allowed(char *ch)
123{
124	/* check for control chars, wildcards etc. */
125	if (!(*ch & 0x80) &&
126	    (*ch <= 0x1f ||
127	     *ch == '?' || *ch == '"' || *ch == '<' ||
128	     *ch == '>' || *ch == '|'))
129		return 0;
130
131	return 1;
132}
133
134/*
135 * smb_from_utf16() - convert utf16le string to local charset
136 * @to:		destination buffer
137 * @from:	source buffer
138 * @tolen:	destination buffer size (in bytes)
139 * @fromlen:	source buffer size (in bytes)
140 * @codepage:	codepage to which characters should be converted
141 * @mapchar:	should characters be remapped according to the mapchars option?
142 *
143 * Convert a little-endian utf16le string (as sent by the server) to a string
144 * in the provided codepage. The tolen and fromlen parameters are to ensure
145 * that the code doesn't walk off of the end of the buffer (which is always
146 * a danger if the alignment of the source buffer is off). The destination
147 * string is always properly null terminated and fits in the destination
148 * buffer. Returns the length of the destination string in bytes (including
149 * null terminator).
150 *
151 * Note that some windows versions actually send multiword UTF-16 characters
152 * instead of straight UTF16-2. The linux nls routines however aren't able to
153 * deal with those characters properly. In the event that we get some of
154 * those characters, they won't be translated properly.
155 *
156 * Return:	string length after conversion
157 */
158static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
159			  const struct nls_table *codepage, bool mapchar)
160{
161	int i, charlen, safelen;
162	int outlen = 0;
163	int nullsize = nls_nullsize(codepage);
164	int fromwords = fromlen / 2;
165	char tmp[NLS_MAX_CHARSET_SIZE];
166	__u16 ftmp;
167
168	/*
169	 * because the chars can be of varying widths, we need to take care
170	 * not to overflow the destination buffer when we get close to the
171	 * end of it. Until we get to this offset, we don't need to check
172	 * for overflow however.
173	 */
174	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
175
176	for (i = 0; i < fromwords; i++) {
177		ftmp = get_unaligned_le16(&from[i]);
178		if (ftmp == 0)
179			break;
180
181		/*
182		 * check to see if converting this character might make the
183		 * conversion bleed into the null terminator
184		 */
185		if (outlen >= safelen) {
186			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
187			if ((outlen + charlen) > (tolen - nullsize))
188				break;
189		}
190
191		/* put converted char into 'to' buffer */
192		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
193		outlen += charlen;
194	}
195
196	/* properly null-terminate string */
197	for (i = 0; i < nullsize; i++)
198		to[outlen++] = 0;
199
200	return outlen;
201}
202
203/*
204 * smb_strtoUTF16() - Convert character string to unicode string
205 * @to:		destination buffer
206 * @from:	source buffer
207 * @len:	destination buffer size (in bytes)
208 * @codepage:	codepage to which characters should be converted
209 *
210 * Return:	string length after conversion
211 */
212int smb_strtoUTF16(__le16 *to, const char *from, int len,
213		   const struct nls_table *codepage)
214{
215	int charlen;
216	int i;
217	wchar_t wchar_to; /* needed to quiet sparse */
218
219	/* special case for utf8 to handle no plane0 chars */
220	if (!strcmp(codepage->charset, "utf8")) {
221		/*
222		 * convert utf8 -> utf16, we assume we have enough space
223		 * as caller should have assumed conversion does not overflow
224		 * in destination len is length in wchar_t units (16bits)
225		 */
226		i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
227				     (wchar_t *)to, len);
228
229		/* if success terminate and exit */
230		if (i >= 0)
231			goto success;
232		/*
233		 * if fails fall back to UCS encoding as this
234		 * function should not return negative values
235		 * currently can fail only if source contains
236		 * invalid encoded characters
237		 */
238	}
239
240	for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
241		charlen = codepage->char2uni(from, len, &wchar_to);
242		if (charlen < 1) {
243			/* A question mark */
244			wchar_to = 0x003f;
245			charlen = 1;
246		}
247		put_unaligned_le16(wchar_to, &to[i]);
248	}
249
250success:
251	put_unaligned_le16(0, &to[i]);
252	return i;
253}
254
255/*
256 * smb_strndup_from_utf16() - copy a string from wire format to the local
257 *		codepage
258 * @src:	source string
259 * @maxlen:	don't walk past this many bytes in the source string
260 * @is_unicode:	is this a unicode string?
261 * @codepage:	destination codepage
262 *
263 * Take a string given by the server, convert it to the local codepage and
264 * put it in a new buffer. Returns a pointer to the new string or NULL on
265 * error.
266 *
267 * Return:	destination string buffer or error ptr
268 */
269char *smb_strndup_from_utf16(const char *src, const int maxlen,
270			     const bool is_unicode,
271			     const struct nls_table *codepage)
272{
273	int len, ret;
274	char *dst;
275
276	if (is_unicode) {
277		len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
278		len += nls_nullsize(codepage);
279		dst = kmalloc(len, GFP_KERNEL);
280		if (!dst)
281			return ERR_PTR(-ENOMEM);
282		ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
283				     false);
284		if (ret < 0) {
285			kfree(dst);
286			return ERR_PTR(-EINVAL);
287		}
288	} else {
289		len = strnlen(src, maxlen);
290		len++;
291		dst = kmalloc(len, GFP_KERNEL);
292		if (!dst)
293			return ERR_PTR(-ENOMEM);
294		strscpy(dst, src, len);
295	}
296
297	return dst;
298}
299
300/*
301 * Convert 16 bit Unicode pathname to wire format from string in current code
302 * page. Conversion may involve remapping up the six characters that are
303 * only legal in POSIX-like OS (if they are present in the string). Path
304 * names are little endian 16 bit Unicode on the wire
305 */
306/*
307 * smbConvertToUTF16() - convert string from local charset to utf16
308 * @target:	destination buffer
309 * @source:	source buffer
310 * @srclen:	source buffer size (in bytes)
311 * @cp:		codepage to which characters should be converted
312 * @mapchar:	should characters be remapped according to the mapchars option?
313 *
314 * Convert 16 bit Unicode pathname to wire format from string in current code
315 * page. Conversion may involve remapping up the six characters that are
316 * only legal in POSIX-like OS (if they are present in the string). Path
317 * names are little endian 16 bit Unicode on the wire
318 *
319 * Return:	char length after conversion
320 */
321int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
322		      const struct nls_table *cp, int mapchars)
323{
324	int i, j, charlen;
325	char src_char;
326	__le16 dst_char;
327	wchar_t tmp;
328
329	if (!mapchars)
330		return smb_strtoUTF16(target, source, srclen, cp);
331
332	for (i = 0, j = 0; i < srclen; j++) {
333		src_char = source[i];
334		charlen = 1;
335		switch (src_char) {
336		case 0:
337			put_unaligned(0, &target[j]);
338			return j;
339		case ':':
340			dst_char = cpu_to_le16(UNI_COLON);
341			break;
342		case '*':
343			dst_char = cpu_to_le16(UNI_ASTERISK);
344			break;
345		case '?':
346			dst_char = cpu_to_le16(UNI_QUESTION);
347			break;
348		case '<':
349			dst_char = cpu_to_le16(UNI_LESSTHAN);
350			break;
351		case '>':
352			dst_char = cpu_to_le16(UNI_GRTRTHAN);
353			break;
354		case '|':
355			dst_char = cpu_to_le16(UNI_PIPE);
356			break;
357		/*
358		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
359		 * until all the calls to build_path_from_dentry are modified,
360		 * as they use backslash as separator.
361		 */
362		default:
363			charlen = cp->char2uni(source + i, srclen - i, &tmp);
364			dst_char = cpu_to_le16(tmp);
365
366			/*
367			 * if no match, use question mark, which at least in
368			 * some cases serves as wild card
369			 */
370			if (charlen < 1) {
371				dst_char = cpu_to_le16(0x003f);
372				charlen = 1;
373			}
374		}
375		/*
376		 * character may take more than one byte in the source string,
377		 * but will take exactly two bytes in the target string
378		 */
379		i += charlen;
380		put_unaligned(dst_char, &target[j]);
381	}
382
383	return j;
384}