OpenCoverage

unicode.c

Absolute File Name:/home/opencoverage/opencoverage/guest-scripts/bash/src/lib/sh/unicode.c
Source codeSwitch to Preprocessed file
LineSourceCount
1/* unicode.c - functions to convert unicode characters */-
2-
3/* Copyright (C) 2010-2016 Free Software Foundation, Inc.-
4-
5 This file is part of GNU Bash, the Bourne Again SHell.-
6-
7 Bash is free software: you can redistribute it and/or modify-
8 it under the terms of the GNU General Public License as published by-
9 the Free Software Foundation, either version 3 of the License, or-
10 (at your option) any later version.-
11-
12 Bash is distributed in the hope that it will be useful,-
13 but WITHOUT ANY WARRANTY; without even the implied warranty of-
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the-
15 GNU General Public License for more details.-
16-
17 You should have received a copy of the GNU General Public License-
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.-
19*/-
20-
21#include <config.h>-
22-
23#if defined (HANDLE_MULTIBYTE)-
24-
25#include <stdc.h>-
26#include <wchar.h>-
27#include <bashansi.h>-
28#ifdef HAVE_UNISTD_H-
29#include <unistd.h>-
30#endif-
31#include <stdio.h>-
32#include <limits.h>-
33-
34#if HAVE_ICONV-
35# include <iconv.h>-
36#endif-
37-
38#include <xmalloc.h>-
39-
40#ifndef USHORT_MAX-
41# ifdef USHRT_MAX-
42# define USHORT_MAX USHRT_MAX-
43# else-
44# define USHORT_MAX ((unsigned short) ~(unsigned short)0)-
45# endif-
46#endif-
47-
48#if !defined (STREQ)-
49# define STREQ(a, b) ((a)[0] == (b)[0] && strcmp ((a), (b)) == 0)-
50#endif /* !STREQ */-
51-
52#if defined (HAVE_LOCALE_CHARSET)-
53extern const char *locale_charset __P((void));-
54#else-
55extern char *get_locale_var __P((char *));-
56#endif-
57-
58extern int locale_utf8locale;-
59-
60static int u32init = 0;-
61static int utf8locale = 0;-
62#if defined (HAVE_ICONV)-
63static iconv_t localconv;-
64#endif-
65-
66#ifndef HAVE_LOCALE_CHARSET-
67static char charsetbuf[40];-
68-
69static char *-
70stub_charset ()-
71{-
72 char *locale, *s, *t;-
73-
74 locale = get_locale_var ("LC_CTYPE");-
75 if (locale == 0 || *locale == 0)
locale == 0Description
TRUEnever evaluated
FALSEnever evaluated
*locale == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
76 {-
77 strcpy (charsetbuf, "ASCII");-
78 return charsetbuf;
never executed: return charsetbuf;
0
79 }-
80 s = strrchr (locale, '.');-
81 if (s)
sDescription
TRUEnever evaluated
FALSEnever evaluated
0
82 {-
83 strncpy (charsetbuf, s+1, sizeof (charsetbuf) - 1);-
84 charsetbuf[sizeof (charsetbuf) - 1] = '\0';-
85 t = strchr (charsetbuf, '@');
__builtin_constant_p ( '@' )Description
TRUEnever evaluated
FALSEnever evaluated
!__builtin_con...( charsetbuf )Description
TRUEnever evaluated
FALSEnever evaluated
( '@' ) == '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
86 if (t)
tDescription
TRUEnever evaluated
FALSEnever evaluated
0
87 *t = 0;
never executed: *t = 0;
0
88 return charsetbuf;
never executed: return charsetbuf;
0
89 }-
90 strncpy (charsetbuf, locale, sizeof (charsetbuf) - 1);-
91 charsetbuf[sizeof (charsetbuf) - 1] = '\0';-
92 return charsetbuf;
never executed: return charsetbuf;
0
93}-
94#endif-
95-
96void-
97u32reset ()-
98{-
99#if defined (HAVE_ICONV)-
100 if (u32init && localconv != (iconv_t)-1)
u32initDescription
TRUEnever evaluated
FALSEevaluated 18822 times by 1 test
Evaluated by:
  • Self test
localconv != (iconv_t)-1Description
TRUEnever evaluated
FALSEnever evaluated
0-18822
101 {-
102 iconv_close (localconv);-
103 localconv = (iconv_t)-1;-
104 }
never executed: end of block
0
105#endif-
106 u32init = 0;-
107 utf8locale = 0;-
108}
executed 18822 times by 1 test: end of block
Executed by:
  • Self test
18822
109-
110/* u32toascii ? */-
111int-
112u32tochar (x, s)-
113 unsigned long x;-
114 char *s;-
115{-
116 int l;-
117-
118 l = (x <= UCHAR_MAX) ? 1 : ((x <= USHORT_MAX) ? 2 : 4);
(x <= (0x7f * 2 + 1) )Description
TRUEnever evaluated
FALSEnever evaluated
(x <= (0x7fff * 2 + 1) )Description
TRUEnever evaluated
FALSEnever evaluated
0
119-
120 if (x <= UCHAR_MAX)
x <= (0x7f * 2 + 1)Description
TRUEnever evaluated
FALSEnever evaluated
0
121 s[0] = x & 0xFF;
never executed: s[0] = x & 0xFF;
0
122 else if (x <= USHORT_MAX) /* assume unsigned short = 16 bits */
x <= (0x7fff * 2 + 1)Description
TRUEnever evaluated
FALSEnever evaluated
0
123 {-
124 s[0] = (x >> 8) & 0xFF;-
125 s[1] = x & 0xFF;-
126 }
never executed: end of block
0
127 else-
128 {-
129 s[0] = (x >> 24) & 0xFF;-
130 s[1] = (x >> 16) & 0xFF;-
131 s[2] = (x >> 8) & 0xFF;-
132 s[3] = x & 0xFF;-
133 }
never executed: end of block
0
134 s[l] = '\0';-
135 return l;
never executed: return l;
0
136}-
137-
138int-
139u32tocesc (wc, s)-
140 u_bits32_t wc;-
141 char *s;-
142{-
143 int l;-
144-
145 if (wc < 0x10000)
wc < 0x10000Description
TRUEnever evaluated
FALSEnever evaluated
0
146 l = sprintf (s, "\\u%04X", wc);
never executed: l = sprintf (s, "\\u%04X", wc);
0
147 else-
148 l = sprintf (s, "\\u%08X", wc);
never executed: l = sprintf (s, "\\u%08X", wc);
0
149 return l;
never executed: return l;
0
150}-
151-
152/* Convert unsigned 32-bit int to utf-8 character string */-
153int-
154u32toutf8 (wc, s)-
155 u_bits32_t wc;-
156 char *s;-
157{-
158 int l;-
159-
160 if (wc < 0x0080)
wc < 0x0080Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
161 {-
162 s[0] = (char)wc;-
163 l = 1;-
164 }
never executed: end of block
0
165 else if (wc < 0x0800)
wc < 0x0800Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
166 {-
167 s[0] = (wc >> 6) | 0xc0;-
168 s[1] = (wc & 0x3f) | 0x80;-
169 l = 2;-
170 }
never executed: end of block
0
171 else if (wc < 0x10000)
wc < 0x10000Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
172 {-
173 /* Technically, we could return 0 here if 0xd800 <= wc <= 0x0dfff */-
174 s[0] = (wc >> 12) | 0xe0;-
175 s[1] = ((wc >> 6) & 0x3f) | 0x80;-
176 s[2] = (wc & 0x3f) | 0x80;-
177 l = 3;-
178 }
never executed: end of block
0
179 else if (wc < 0x200000)
wc < 0x200000Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
180 {-
181 s[0] = (wc >> 18) | 0xf0;-
182 s[1] = ((wc >> 12) & 0x3f) | 0x80;-
183 s[2] = ((wc >> 6) & 0x3f) | 0x80;-
184 s[3] = (wc & 0x3f) | 0x80;-
185 l = 4;-
186 }
never executed: end of block
0
187 /* Strictly speaking, UTF-8 doesn't have characters longer than 4 bytes */-
188 else if (wc < 0x04000000)
wc < 0x04000000Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
189 {-
190 s[0] = (wc >> 24) | 0xf8;-
191 s[1] = ((wc >> 18) & 0x3f) | 0x80;-
192 s[2] = ((wc >> 12) & 0x3f) | 0x80;-
193 s[3] = ((wc >> 6) & 0x3f) | 0x80;-
194 s[4] = (wc & 0x3f) | 0x80;-
195 l = 5;-
196 }
never executed: end of block
0
197 else if (wc < 0x080000000)
wc < 0x080000000Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-28
198 {-
199 s[0] = (wc >> 30) | 0xf8;-
200 s[1] = ((wc >> 24) & 0x3f) | 0x80;-
201 s[2] = ((wc >> 18) & 0x3f) | 0x80;-
202 s[3] = ((wc >> 12) & 0x3f) | 0x80;-
203 s[4] = ((wc >> 6) & 0x3f) | 0x80;-
204 s[5] = (wc & 0x3f) | 0x80;-
205 l = 6;-
206 }
never executed: end of block
0
207 else-
208 l = 0;
executed 28 times by 1 test: l = 0;
Executed by:
  • Self test
28
209-
210 s[l] = '\0';-
211 return l;
executed 28 times by 1 test: return l;
Executed by:
  • Self test
28
212}-
213-
214/* Convert a 32-bit unsigned int (unicode) to a UTF-16 string. Rarely used,-
215 only if sizeof(wchar_t) == 2. */-
216int-
217u32toutf16 (c, s)-
218 u_bits32_t c;-
219 unsigned short *s;-
220{-
221 int l;-
222-
223 l = 0;-
224 if (c < 0x0d800 || (c >= 0x0e000 && c <= 0x0ffff))
c < 0x0d800Description
TRUEnever evaluated
FALSEnever evaluated
c >= 0x0e000Description
TRUEnever evaluated
FALSEnever evaluated
c <= 0x0ffffDescription
TRUEnever evaluated
FALSEnever evaluated
0
225 {-
226 s[0] = (unsigned short) (c & 0xFFFF);-
227 l = 1;-
228 }
never executed: end of block
0
229 else if (c >= 0x10000 && c <= 0x010ffff)
c >= 0x10000Description
TRUEnever evaluated
FALSEnever evaluated
c <= 0x010ffffDescription
TRUEnever evaluated
FALSEnever evaluated
0
230 {-
231 c -= 0x010000;-
232 s[0] = (unsigned short)((c >> 10) + 0xd800);-
233 s[1] = (unsigned short)((c & 0x3ff) + 0xdc00);-
234 l = 2;-
235 }
never executed: end of block
0
236 s[l] = 0;-
237 return l;
never executed: return l;
0
238}-
239-
240/* convert a single unicode-32 character into a multibyte string and put the-
241 result in S, which must be large enough (at least max(10,MB_LEN_MAX) bytes) */-
242int-
243u32cconv (c, s)-
244 unsigned long c;-
245 char *s;-
246{-
247 wchar_t wc;-
248 wchar_t ws[3];-
249 int n;-
250#if HAVE_ICONV-
251 const char *charset;-
252 char obuf[25], *optr;-
253 size_t obytesleft;-
254 const char *iptr;-
255 size_t sn;-
256#endif-
257-
258#if __STDC_ISO_10646__-
259 wc = c;-
260 if (sizeof (wchar_t) == 4 && c <= 0x7fffffff)
sizeof (wchar_t) == 4Description
TRUEevaluated 1112 times by 1 test
Evaluated by:
  • Self test
FALSEnever evaluated
c <= 0x7fffffffDescription
TRUEevaluated 1084 times by 1 test
Evaluated by:
  • Self test
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
0-1112
261 n = wctomb (s, wc);
executed 1084 times by 1 test: n = wctomb (s, wc);
Executed by:
  • Self test
1084
262 else if (sizeof (wchar_t) == 2 && c <= 0x10ffff && u32toutf16 (c, ws))
sizeof (wchar_t) == 2Description
TRUEnever evaluated
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
c <= 0x10ffffDescription
TRUEnever evaluated
FALSEnever evaluated
u32toutf16 (c, ws)Description
TRUEnever evaluated
FALSEnever evaluated
0-28
263 n = wcstombs (s, ws, MB_LEN_MAX);
never executed: n = wcstombs (s, ws, 16 );
0
264 else-
265 n = -1;
executed 28 times by 1 test: n = -1;
Executed by:
  • Self test
28
266 if (n != -1)
n != -1Description
TRUEevaluated 1084 times by 1 test
Evaluated by:
  • Self test
FALSEevaluated 28 times by 1 test
Evaluated by:
  • Self test
28-1084
267 return n;
executed 1084 times by 1 test: return n;
Executed by:
  • Self test
1084
268#endif-
269-
270#if HAVE_ICONV-
271 /* this is mostly from coreutils-8.5/lib/unicodeio.c */-
272 if (u32init == 0)
u32init == 0Description
TRUEevaluated 16 times by 1 test
Evaluated by:
  • Self test
FALSEevaluated 12 times by 1 test
Evaluated by:
  • Self test
12-16
273 {-
274 utf8locale = locale_utf8locale;-
275 localconv = (iconv_t)-1;-
276 if (utf8locale == 0)
utf8locale == 0Description
TRUEnever evaluated
FALSEevaluated 16 times by 1 test
Evaluated by:
  • Self test
0-16
277 {-
278#if HAVE_LOCALE_CHARSET-
279 charset = locale_charset ();-
280#elif HAVE_NL_LANGINFO-
281 charset = nl_langinfo (CODESET);-
282#else-
283 charset = stub_charset ();-
284#endif-
285 localconv = iconv_open (charset, "UTF-8");-
286 if (localconv == (iconv_t)-1)
localconv == (iconv_t)-1Description
TRUEnever evaluated
FALSEnever evaluated
0
287 /* We assume ASCII when presented with an unknown encoding. */-
288 localconv = iconv_open ("ASCII", "UTF-8");
never executed: localconv = iconv_open ("ASCII", "UTF-8");
0
289 }
never executed: end of block
0
290 u32init = 1;-
291 }
executed 16 times by 1 test: end of block
Executed by:
  • Self test
16
292-
293 /* NL_LANGINFO and locale_charset used when setting locale_utf8locale */-
294 -
295 /* If we have a UTF-8 locale, convert to UTF-8 and return converted value. */-
296 n = u32toutf8 (c, s);-
297 if (utf8locale)
utf8localeDescription
TRUEevaluated 28 times by 1 test
Evaluated by:
  • Self test
FALSEnever evaluated
0-28
298 return n;
executed 28 times by 1 test: return n;
Executed by:
  • Self test
28
299-
300 /* If the conversion is not supported, even the ASCII requested above, we-
301 bail now. Currently we return the UTF-8 conversion. We could return-
302 u32tocesc(). */-
303 if (localconv == (iconv_t)-1)
localconv == (iconv_t)-1Description
TRUEnever evaluated
FALSEnever evaluated
0
304 return n;
never executed: return n;
0
305 -
306 optr = obuf;-
307 obytesleft = sizeof (obuf);-
308 iptr = s;-
309 sn = n;-
310-
311 iconv (localconv, NULL, NULL, NULL, NULL);-
312-
313 if (iconv (localconv, (ICONV_CONST char **)&iptr, &sn, &optr, &obytesleft) == (size_t)-1)
iconv (localco... == (size_t)-1Description
TRUEnever evaluated
FALSEnever evaluated
0
314 {-
315 /* You get ISO C99 escape sequences if iconv fails */ -
316 n = u32tocesc (c, s);-
317 return n;
never executed: return n;
0
318 }-
319-
320 *optr = '\0';-
321-
322 /* number of chars to be copied is optr - obuf if we want to do bounds-
323 checking */-
324 strcpy (s, obuf);-
325 return (optr - obuf);
never executed: return (optr - obuf);
0
326#endif /* HAVE_ICONV */-
327-
328 if (locale_utf8locale)
dead code: if (locale_utf8locale) n = u32toutf8 (c, s); else n = u32tocesc (c, s);
-
329 n = u32toutf8 (c, s);
dead code: if (locale_utf8locale) n = u32toutf8 (c, s); else n = u32tocesc (c, s);
-
330 else
dead code: if (locale_utf8locale) n = u32toutf8 (c, s); else n = u32tocesc (c, s);
-
331 n = u32tocesc (c, s); /* fallback is ISO C99 escape sequences */
dead code: if (locale_utf8locale) n = u32toutf8 (c, s); else n = u32tocesc (c, s);
-
332 return n;
dead code: return n;
-
333}-
334#else-
335void-
336u32reset ()-
337{-
338}-
339#endif /* HANDLE_MULTIBYTE */-
Source codeSwitch to Preprocessed file

Generated by Squish Coco 4.1.2