OpenCoverage

mbrtowc.c

Absolute File Name:/home/opencoverage/opencoverage/guest-scripts/coreutils/src/gnulib/lib/mbrtowc.c
Source codeSwitch to Preprocessed file
LineSourceCount
1/* Convert multibyte character to wide character.-
2 Copyright (C) 1999-2002, 2005-2018 Free Software Foundation, Inc.-
3 Written by Bruno Haible <bruno@clisp.org>, 2008.-
4-
5 This program is free software: you can redistribute it and/or modify-
6 it under the terms of the GNU General Public License as published by-
7 the Free Software Foundation; either version 3 of the License, or-
8 (at your option) any later version.-
9-
10 This program is distributed in the hope that it will be useful,-
11 but WITHOUT ANY WARRANTY; without even the implied warranty of-
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the-
13 GNU General Public License for more details.-
14-
15 You should have received a copy of the GNU General Public License-
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */-
17-
18#include <config.h>-
19-
20/* Specification. */-
21#include <wchar.h>-
22-
23#if C_LOCALE_MAYBE_EILSEQ-
24# include "hard-locale.h"-
25# include <locale.h>-
26#endif-
27-
28#if GNULIB_defined_mbstate_t-
29/* Implement mbrtowc() on top of mbtowc(). */-
30-
31# include <errno.h>-
32# include <stdlib.h>-
33-
34# include "localcharset.h"-
35# include "streq.h"-
36# include "verify.h"-
37-
38#ifndef FALLTHROUGH-
39# if __GNUC__ < 7-
40# define FALLTHROUGH ((void) 0)-
41# else-
42# define FALLTHROUGH __attribute__ ((__fallthrough__))-
43# endif-
44#endif-
45-
46verify (sizeof (mbstate_t) >= 4);-
47-
48static char internal_state[4];-
49-
50size_t-
51mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)-
52{-
53 char *pstate = (char *)ps;-
54-
55 if (s == NULL)-
56 {-
57 pwc = NULL;-
58 s = "";-
59 n = 1;-
60 }-
61-
62 if (n == 0)-
63 return (size_t)(-2);-
64-
65 /* Here n > 0. */-
66-
67 if (pstate == NULL)-
68 pstate = internal_state;-
69-
70 {-
71 size_t nstate = pstate[0];-
72 char buf[4];-
73 const char *p;-
74 size_t m;-
75-
76 switch (nstate)-
77 {-
78 case 0:-
79 p = s;-
80 m = n;-
81 break;-
82 case 3:-
83 buf[2] = pstate[3];-
84 FALLTHROUGH;-
85 case 2:-
86 buf[1] = pstate[2];-
87 FALLTHROUGH;-
88 case 1:-
89 buf[0] = pstate[1];-
90 p = buf;-
91 m = nstate;-
92 buf[m++] = s[0];-
93 if (n >= 2 && m < 4)-
94 {-
95 buf[m++] = s[1];-
96 if (n >= 3 && m < 4)-
97 buf[m++] = s[2];-
98 }-
99 break;-
100 default:-
101 errno = EINVAL;-
102 return (size_t)(-1);-
103 }-
104-
105 /* Here m > 0. */-
106-
107# if __GLIBC__ || defined __UCLIBC__-
108 /* Work around bug <https://sourceware.org/bugzilla/show_bug.cgi?id=9674> */-
109 mbtowc (NULL, NULL, 0);-
110# endif-
111 {-
112 int res = mbtowc (pwc, p, m);-
113-
114 if (res >= 0)-
115 {-
116 if (pwc != NULL && ((*pwc == 0) != (res == 0)))-
117 abort ();-
118 if (nstate >= (res > 0 ? res : 1))-
119 abort ();-
120 res -= nstate;-
121 pstate[0] = 0;-
122 return res;-
123 }-
124-
125 /* mbtowc does not distinguish between invalid and incomplete multibyte-
126 sequences. But mbrtowc needs to make this distinction.-
127 There are two possible approaches:-
128 - Use iconv() and its return value.-
129 - Use built-in knowledge about the possible encodings.-
130 Given the low quality of implementation of iconv() on the systems that-
131 lack mbrtowc(), we use the second approach.-
132 The possible encodings are:-
133 - 8-bit encodings,-
134 - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,-
135 - UTF-8.-
136 Use specialized code for each. */-
137 if (m >= 4 || m >= MB_CUR_MAX)-
138 goto invalid;-
139 /* Here MB_CUR_MAX > 1 and 0 < m < 4. */-
140 {-
141 const char *encoding = locale_charset ();-
142-
143 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))-
144 {-
145 /* Cf. unistr/u8-mblen.c. */-
146 unsigned char c = (unsigned char) p[0];-
147-
148 if (c >= 0xc2)-
149 {-
150 if (c < 0xe0)-
151 {-
152 if (m == 1)-
153 goto incomplete;-
154 }-
155 else if (c < 0xf0)-
156 {-
157 if (m == 1)-
158 goto incomplete;-
159 if (m == 2)-
160 {-
161 unsigned char c2 = (unsigned char) p[1];-
162-
163 if ((c2 ^ 0x80) < 0x40-
164 && (c >= 0xe1 || c2 >= 0xa0)-
165 && (c != 0xed || c2 < 0xa0))-
166 goto incomplete;-
167 }-
168 }-
169 else if (c <= 0xf4)-
170 {-
171 if (m == 1)-
172 goto incomplete;-
173 else /* m == 2 || m == 3 */-
174 {-
175 unsigned char c2 = (unsigned char) p[1];-
176-
177 if ((c2 ^ 0x80) < 0x40-
178 && (c >= 0xf1 || c2 >= 0x90)-
179 && (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))-
180 {-
181 if (m == 2)-
182 goto incomplete;-
183 else /* m == 3 */-
184 {-
185 unsigned char c3 = (unsigned char) p[2];-
186-
187 if ((c3 ^ 0x80) < 0x40)-
188 goto incomplete;-
189 }-
190 }-
191 }-
192 }-
193 }-
194 goto invalid;-
195 }-
196-
197 /* As a reference for this code, you can use the GNU libiconv-
198 implementation. Look for uses of the RET_TOOFEW macro. */-
199-
200 if (STREQ_OPT (encoding,-
201 "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))-
202 {-
203 if (m == 1)-
204 {-
205 unsigned char c = (unsigned char) p[0];-
206-
207 if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)-
208 goto incomplete;-
209 }-
210 if (m == 2)-
211 {-
212 unsigned char c = (unsigned char) p[0];-
213-
214 if (c == 0x8f)-
215 {-
216 unsigned char c2 = (unsigned char) p[1];-
217-
218 if (c2 >= 0xa1 && c2 < 0xff)-
219 goto incomplete;-
220 }-
221 }-
222 goto invalid;-
223 }-
224 if (STREQ_OPT (encoding,-
225 "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)-
226 || STREQ_OPT (encoding,-
227 "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)-
228 || STREQ_OPT (encoding,-
229 "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))-
230 {-
231 if (m == 1)-
232 {-
233 unsigned char c = (unsigned char) p[0];-
234-
235 if (c >= 0xa1 && c < 0xff)-
236 goto incomplete;-
237 }-
238 goto invalid;-
239 }-
240 if (STREQ_OPT (encoding,-
241 "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))-
242 {-
243 if (m == 1)-
244 {-
245 unsigned char c = (unsigned char) p[0];-
246-
247 if ((c >= 0xa1 && c < 0xff) || c == 0x8e)-
248 goto incomplete;-
249 }-
250 else /* m == 2 || m == 3 */-
251 {-
252 unsigned char c = (unsigned char) p[0];-
253-
254 if (c == 0x8e)-
255 goto incomplete;-
256 }-
257 goto invalid;-
258 }-
259 if (STREQ_OPT (encoding,-
260 "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))-
261 {-
262 if (m == 1)-
263 {-
264 unsigned char c = (unsigned char) p[0];-
265-
266 if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))-
267 goto incomplete;-
268 }-
269 else /* m == 2 || m == 3 */-
270 {-
271 unsigned char c = (unsigned char) p[0];-
272-
273 if (c >= 0x90 && c <= 0xe3)-
274 {-
275 unsigned char c2 = (unsigned char) p[1];-
276-
277 if (c2 >= 0x30 && c2 <= 0x39)-
278 {-
279 if (m == 2)-
280 goto incomplete;-
281 else /* m == 3 */-
282 {-
283 unsigned char c3 = (unsigned char) p[2];-
284-
285 if (c3 >= 0x81 && c3 <= 0xfe)-
286 goto incomplete;-
287 }-
288 }-
289 }-
290 }-
291 goto invalid;-
292 }-
293 if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))-
294 {-
295 if (m == 1)-
296 {-
297 unsigned char c = (unsigned char) p[0];-
298-
299 if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)-
300 || (c >= 0xf0 && c <= 0xf9))-
301 goto incomplete;-
302 }-
303 goto invalid;-
304 }-
305-
306 /* An unknown multibyte encoding. */-
307 goto incomplete;-
308 }-
309-
310 incomplete:-
311 {-
312 size_t k = nstate;-
313 /* Here 0 <= k < m < 4. */-
314 pstate[++k] = s[0];-
315 if (k < m)-
316 {-
317 pstate[++k] = s[1];-
318 if (k < m)-
319 pstate[++k] = s[2];-
320 }-
321 if (k != m)-
322 abort ();-
323 }-
324 pstate[0] = m;-
325 return (size_t)(-2);-
326-
327 invalid:-
328 errno = EILSEQ;-
329 /* The conversion state is undefined, says POSIX. */-
330 return (size_t)(-1);-
331 }-
332 }-
333}-
334-
335#else-
336/* Override the system's mbrtowc() function. */-
337-
338# undef mbrtowc-
339-
340size_t-
341rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)-
342{-
343 size_t ret;-
344 wchar_t wc;-
345-
346# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG-
347 if (s == NULL)-
348 {-
349 pwc = NULL;-
350 s = "";-
351 n = 1;-
352 }-
353# endif-
354-
355# if MBRTOWC_EMPTY_INPUT_BUG-
356 if (n == 0)-
357 return (size_t) -2;-
358# endif-
359-
360 if (! pwc)
! pwcDescription
TRUEnever evaluated
FALSEnever evaluated
0
361 pwc = &wc;
never executed: pwc = &wc;
0
362-
363# if MBRTOWC_RETVAL_BUG-
364 {-
365 static mbstate_t internal_state;-
366-
367 /* Override mbrtowc's internal state. We cannot call mbsinit() on the-
368 hidden internal state, but we can call it on our variable. */-
369 if (ps == NULL)-
370 ps = &internal_state;-
371-
372 if (!mbsinit (ps))-
373 {-
374 /* Parse the rest of the multibyte character byte for byte. */-
375 size_t count = 0;-
376 for (; n > 0; s++, n--)-
377 {-
378 ret = mbrtowc (&wc, s, 1, ps);-
379-
380 if (ret == (size_t)(-1))-
381 return (size_t)(-1);-
382 count++;-
383 if (ret != (size_t)(-2))-
384 {-
385 /* The multibyte character has been completed. */-
386 *pwc = wc;-
387 return (wc == 0 ? 0 : count);-
388 }-
389 }-
390 return (size_t)(-2);-
391 }-
392 }-
393# endif-
394-
395 ret = mbrtowc (pwc, s, n, ps);-
396-
397# if MBRTOWC_NUL_RETVAL_BUG-
398 if (ret < (size_t) -2 && !*pwc)-
399 return 0;-
400# endif-
401-
402# if C_LOCALE_MAYBE_EILSEQ-
403 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
(size_t) -2 <= retDescription
TRUEnever evaluated
FALSEnever evaluated
n != 0Description
TRUEnever evaluated
FALSEnever evaluated
! hard_locale ( 0 )Description
TRUEnever evaluated
FALSEnever evaluated
0
404 {-
405 unsigned char uc = *s;-
406 *pwc = uc;-
407 return 1;
never executed: return 1;
0
408 }-
409# endif-
410-
411 return ret;
never executed: return ret;
0
412}-
413-
414#endif-
Source codeSwitch to Preprocessed file

Generated by Squish Coco 4.1.2