Line | Source | Count |
1 | | - |
2 | | - |
3 | | - |
4 | | - |
5 | | - |
6 | | - |
7 | | - |
8 | | - |
9 | | - |
10 | | - |
11 | | - |
12 | | - |
13 | | - |
14 | | - |
15 | | - |
16 | | - |
17 | | - |
18 | #include <config.h> | - |
19 | | - |
20 | | - |
21 | #include <wchar.h> | - |
22 | | - |
23 | #if C_LOCALE_MAYBE_EILSEQ | - |
24 | # include "hard-locale.h" | - |
25 | # include <locale.h> | - |
26 | #endif | - |
27 | | - |
28 | #if GNULIB_defined_mbstate_t | - |
29 | | - |
30 | | - |
31 | # include <errno.h> | - |
32 | # include <stdlib.h> | - |
33 | | - |
34 | # include "localcharset.h" | - |
35 | # include "streq.h" | - |
36 | # include "verify.h" | - |
37 | | - |
38 | #ifndef FALLTHROUGH | - |
39 | # if __GNUC__ < 7 | - |
40 | # define FALLTHROUGH ((void) 0) | - |
41 | # else | - |
42 | # define FALLTHROUGH __attribute__ ((__fallthrough__)) | - |
43 | # endif | - |
44 | #endif | - |
45 | | - |
46 | verify (sizeof (mbstate_t) >= 4); | - |
47 | | - |
48 | static char internal_state[4]; | - |
49 | | - |
50 | size_t | - |
51 | mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) | - |
52 | { | - |
53 | char *pstate = (char *)ps; | - |
54 | | - |
55 | if (s == NULL) | - |
56 | { | - |
57 | pwc = NULL; | - |
58 | s = ""; | - |
59 | n = 1; | - |
60 | } | - |
61 | | - |
62 | if (n == 0) | - |
63 | return (size_t)(-2); | - |
64 | | - |
65 | | - |
66 | | - |
67 | if (pstate == NULL) | - |
68 | pstate = internal_state; | - |
69 | | - |
70 | { | - |
71 | size_t nstate = pstate[0]; | - |
72 | char buf[4]; | - |
73 | const char *p; | - |
74 | size_t m; | - |
75 | | - |
76 | switch (nstate) | - |
77 | { | - |
78 | case 0: | - |
79 | p = s; | - |
80 | m = n; | - |
81 | break; | - |
82 | case 3: | - |
83 | buf[2] = pstate[3]; | - |
84 | FALLTHROUGH; | - |
85 | case 2: | - |
86 | buf[1] = pstate[2]; | - |
87 | FALLTHROUGH; | - |
88 | case 1: | - |
89 | buf[0] = pstate[1]; | - |
90 | p = buf; | - |
91 | m = nstate; | - |
92 | buf[m++] = s[0]; | - |
93 | if (n >= 2 && m < 4) | - |
94 | { | - |
95 | buf[m++] = s[1]; | - |
96 | if (n >= 3 && m < 4) | - |
97 | buf[m++] = s[2]; | - |
98 | } | - |
99 | break; | - |
100 | default: | - |
101 | errno = EINVAL; | - |
102 | return (size_t)(-1); | - |
103 | } | - |
104 | | - |
105 | | - |
106 | | - |
107 | # if __GLIBC__ || defined __UCLIBC__ | - |
108 | | - |
109 | mbtowc (NULL, NULL, 0); | - |
110 | # endif | - |
111 | { | - |
112 | int res = mbtowc (pwc, p, m); | - |
113 | | - |
114 | if (res >= 0) | - |
115 | { | - |
116 | if (pwc != NULL && ((*pwc == 0) != (res == 0))) | - |
117 | abort (); | - |
118 | if (nstate >= (res > 0 ? res : 1)) | - |
119 | abort (); | - |
120 | res -= nstate; | - |
121 | pstate[0] = 0; | - |
122 | return res; | - |
123 | } | - |
124 | | - |
125 | | - |
126 | | - |
127 | | - |
128 | | - |
129 | | - |
130 | | - |
131 | | - |
132 | | - |
133 | | - |
134 | | - |
135 | | - |
136 | | - |
137 | if (m >= 4 || m >= MB_CUR_MAX) | - |
138 | goto invalid; | - |
139 | | - |
140 | { | - |
141 | const char *encoding = locale_charset (); | - |
142 | | - |
143 | if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) | - |
144 | { | - |
145 | | - |
146 | unsigned char c = (unsigned char) p[0]; | - |
147 | | - |
148 | if (c >= 0xc2) | - |
149 | { | - |
150 | if (c < 0xe0) | - |
151 | { | - |
152 | if (m == 1) | - |
153 | goto incomplete; | - |
154 | } | - |
155 | else if (c < 0xf0) | - |
156 | { | - |
157 | if (m == 1) | - |
158 | goto incomplete; | - |
159 | if (m == 2) | - |
160 | { | - |
161 | unsigned char c2 = (unsigned char) p[1]; | - |
162 | | - |
163 | if ((c2 ^ 0x80) < 0x40 | - |
164 | && (c >= 0xe1 || c2 >= 0xa0) | - |
165 | && (c != 0xed || c2 < 0xa0)) | - |
166 | goto incomplete; | - |
167 | } | - |
168 | } | - |
169 | else if (c <= 0xf4) | - |
170 | { | - |
171 | if (m == 1) | - |
172 | goto incomplete; | - |
173 | else | - |
174 | { | - |
175 | unsigned char c2 = (unsigned char) p[1]; | - |
176 | | - |
177 | if ((c2 ^ 0x80) < 0x40 | - |
178 | && (c >= 0xf1 || c2 >= 0x90) | - |
179 | && (c < 0xf4 || (c == 0xf4 && c2 < 0x90))) | - |
180 | { | - |
181 | if (m == 2) | - |
182 | goto incomplete; | - |
183 | else | - |
184 | { | - |
185 | unsigned char c3 = (unsigned char) p[2]; | - |
186 | | - |
187 | if ((c3 ^ 0x80) < 0x40) | - |
188 | goto incomplete; | - |
189 | } | - |
190 | } | - |
191 | } | - |
192 | } | - |
193 | } | - |
194 | goto invalid; | - |
195 | } | - |
196 | | - |
197 | | - |
198 | | - |
199 | | - |
200 | if (STREQ_OPT (encoding, | - |
201 | "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0)) | - |
202 | { | - |
203 | if (m == 1) | - |
204 | { | - |
205 | unsigned char c = (unsigned char) p[0]; | - |
206 | | - |
207 | if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f) | - |
208 | goto incomplete; | - |
209 | } | - |
210 | if (m == 2) | - |
211 | { | - |
212 | unsigned char c = (unsigned char) p[0]; | - |
213 | | - |
214 | if (c == 0x8f) | - |
215 | { | - |
216 | unsigned char c2 = (unsigned char) p[1]; | - |
217 | | - |
218 | if (c2 >= 0xa1 && c2 < 0xff) | - |
219 | goto incomplete; | - |
220 | } | - |
221 | } | - |
222 | goto invalid; | - |
223 | } | - |
224 | if (STREQ_OPT (encoding, | - |
225 | "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) | - |
226 | || STREQ_OPT (encoding, | - |
227 | "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) | - |
228 | || STREQ_OPT (encoding, | - |
229 | "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0)) | - |
230 | { | - |
231 | if (m == 1) | - |
232 | { | - |
233 | unsigned char c = (unsigned char) p[0]; | - |
234 | | - |
235 | if (c >= 0xa1 && c < 0xff) | - |
236 | goto incomplete; | - |
237 | } | - |
238 | goto invalid; | - |
239 | } | - |
240 | if (STREQ_OPT (encoding, | - |
241 | "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0)) | - |
242 | { | - |
243 | if (m == 1) | - |
244 | { | - |
245 | unsigned char c = (unsigned char) p[0]; | - |
246 | | - |
247 | if ((c >= 0xa1 && c < 0xff) || c == 0x8e) | - |
248 | goto incomplete; | - |
249 | } | - |
250 | else | - |
251 | { | - |
252 | unsigned char c = (unsigned char) p[0]; | - |
253 | | - |
254 | if (c == 0x8e) | - |
255 | goto incomplete; | - |
256 | } | - |
257 | goto invalid; | - |
258 | } | - |
259 | if (STREQ_OPT (encoding, | - |
260 | "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) | - |
261 | { | - |
262 | if (m == 1) | - |
263 | { | - |
264 | unsigned char c = (unsigned char) p[0]; | - |
265 | | - |
266 | if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe)) | - |
267 | goto incomplete; | - |
268 | } | - |
269 | else | - |
270 | { | - |
271 | unsigned char c = (unsigned char) p[0]; | - |
272 | | - |
273 | if (c >= 0x90 && c <= 0xe3) | - |
274 | { | - |
275 | unsigned char c2 = (unsigned char) p[1]; | - |
276 | | - |
277 | if (c2 >= 0x30 && c2 <= 0x39) | - |
278 | { | - |
279 | if (m == 2) | - |
280 | goto incomplete; | - |
281 | else | - |
282 | { | - |
283 | unsigned char c3 = (unsigned char) p[2]; | - |
284 | | - |
285 | if (c3 >= 0x81 && c3 <= 0xfe) | - |
286 | goto incomplete; | - |
287 | } | - |
288 | } | - |
289 | } | - |
290 | } | - |
291 | goto invalid; | - |
292 | } | - |
293 | if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0)) | - |
294 | { | - |
295 | if (m == 1) | - |
296 | { | - |
297 | unsigned char c = (unsigned char) p[0]; | - |
298 | | - |
299 | if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea) | - |
300 | || (c >= 0xf0 && c <= 0xf9)) | - |
301 | goto incomplete; | - |
302 | } | - |
303 | goto invalid; | - |
304 | } | - |
305 | | - |
306 | | - |
307 | goto incomplete; | - |
308 | } | - |
309 | | - |
310 | incomplete: | - |
311 | { | - |
312 | size_t k = nstate; | - |
313 | | - |
314 | pstate[++k] = s[0]; | - |
315 | if (k < m) | - |
316 | { | - |
317 | pstate[++k] = s[1]; | - |
318 | if (k < m) | - |
319 | pstate[++k] = s[2]; | - |
320 | } | - |
321 | if (k != m) | - |
322 | abort (); | - |
323 | } | - |
324 | pstate[0] = m; | - |
325 | return (size_t)(-2); | - |
326 | | - |
327 | invalid: | - |
328 | errno = EILSEQ; | - |
329 | | - |
330 | return (size_t)(-1); | - |
331 | } | - |
332 | } | - |
333 | } | - |
334 | | - |
335 | #else | - |
336 | | - |
337 | | - |
338 | # undef mbrtowc | - |
339 | | - |
340 | size_t | - |
341 | rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) | - |
342 | { | - |
343 | size_t ret; | - |
344 | wchar_t wc; | - |
345 | | - |
346 | # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG | - |
347 | if (s == NULL) | - |
348 | { | - |
349 | pwc = NULL; | - |
350 | s = ""; | - |
351 | n = 1; | - |
352 | } | - |
353 | # endif | - |
354 | | - |
355 | # if MBRTOWC_EMPTY_INPUT_BUG | - |
356 | if (n == 0) | - |
357 | return (size_t) -2; | - |
358 | # endif | - |
359 | | - |
360 | if (! pwc)TRUE | never evaluated | FALSE | never evaluated |
| 0 |
361 | pwc = &wc; never executed: pwc = &wc; | 0 |
362 | | - |
363 | # if MBRTOWC_RETVAL_BUG | - |
364 | { | - |
365 | static mbstate_t internal_state; | - |
366 | | - |
367 | | - |
368 | | - |
369 | if (ps == NULL) | - |
370 | ps = &internal_state; | - |
371 | | - |
372 | if (!mbsinit (ps)) | - |
373 | { | - |
374 | | - |
375 | size_t count = 0; | - |
376 | for (; n > 0; s++, n--) | - |
377 | { | - |
378 | ret = mbrtowc (&wc, s, 1, ps); | - |
379 | | - |
380 | if (ret == (size_t)(-1)) | - |
381 | return (size_t)(-1); | - |
382 | count++; | - |
383 | if (ret != (size_t)(-2)) | - |
384 | { | - |
385 | | - |
386 | *pwc = wc; | - |
387 | return (wc == 0 ? 0 : count); | - |
388 | } | - |
389 | } | - |
390 | return (size_t)(-2); | - |
391 | } | - |
392 | } | - |
393 | # endif | - |
394 | | - |
395 | ret = mbrtowc (pwc, s, n, ps); | - |
396 | | - |
397 | # if MBRTOWC_NUL_RETVAL_BUG | - |
398 | if (ret < (size_t) -2 && !*pwc) | - |
399 | return 0; | - |
400 | # endif | - |
401 | | - |
402 | # if C_LOCALE_MAYBE_EILSEQ | - |
403 | if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))TRUE | never evaluated | FALSE | never evaluated |
TRUE | never evaluated | FALSE | never evaluated |
TRUE | never evaluated | FALSE | never evaluated |
| 0 |
404 | { | - |
405 | unsigned char uc = *s; | - |
406 | *pwc = uc; | - |
407 | return 1; never executed: return 1; | 0 |
408 | } | - |
409 | # endif | - |
410 | | - |
411 | return ret; never executed: return ret; | 0 |
412 | } | - |
413 | | - |
414 | #endif | - |
| | |