OpenCoverage

mbsstr.c

Absolute File Name:/home/opencoverage/opencoverage/guest-scripts/coreutils/src/gnulib/lib/mbsstr.c
Source codeSwitch to Preprocessed file
LineSourceCount
1/* Searching in a string. -*- coding: utf-8 -*--
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.-
3 Written by Bruno Haible <bruno@clisp.org>, 2005.-
4-
5 This program is free software: you can redistribute it and/or modify-
6 it under the terms of the GNU General Public License as published by-
7 the Free Software Foundation; either version 3 of the License, or-
8 (at your option) any later version.-
9-
10 This program is distributed in the hope that it will be useful,-
11 but WITHOUT ANY WARRANTY; without even the implied warranty of-
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the-
13 GNU General Public License for more details.-
14-
15 You should have received a copy of the GNU General Public License-
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */-
17-
18#include <config.h>-
19-
20/* Specification. */-
21#include <string.h>-
22-
23#include <stdbool.h>-
24#include <stddef.h> /* for NULL, in case a nonstandard string.h lacks it */-
25-
26#include "malloca.h"-
27#include "mbuiter.h"-
28-
29/* Knuth-Morris-Pratt algorithm. */-
30#define UNIT unsigned char-
31#define CANON_ELEMENT(c) c-
32#include "str-kmp.h"-
33-
34/* Knuth-Morris-Pratt algorithm.-
35 See https://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm-
36 Return a boolean indicating success:-
37 Return true and set *RESULTP if the search was completed.-
38 Return false if it was aborted because not enough memory was available. */-
39static bool-
40knuth_morris_pratt_multibyte (const char *haystack, const char *needle,-
41 const char **resultp)-
42{-
43 size_t m = mbslen (needle);-
44 mbchar_t *needle_mbchars;-
45 size_t *table;-
46-
47 /* Allocate room for needle_mbchars and the table. */-
48 void *memory = nmalloca (m, sizeof (mbchar_t) + sizeof (size_t));
(__builtin_con...oc_count); }))Description
TRUEnever evaluated
FALSEnever evaluated
((m) * (sizeof...nment_max - 1)Description
TRUEnever evaluated
FALSEnever evaluated
__builtin_constant_p (m)Description
TRUEnever evaluated
FALSEnever evaluated
__builtin_cons...zeof (size_t))Description
TRUEnever evaluated
FALSEnever evaluated
0
49 void *table_memory;-
50 if (memory == NULL)
memory == ((void *)0)Description
TRUEnever evaluated
FALSEnever evaluated
0
51 return false;
never executed: return 0 ;
0
52 needle_mbchars = memory;-
53 table_memory = needle_mbchars + m;-
54 table = table_memory;-
55-
56 /* Fill needle_mbchars. */-
57 {-
58 mbui_iterator_t iter;-
59 size_t j;-
60-
61 j = 0;-
62 for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
((iter).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
63 mb_copy (&needle_mbchars[j], &mbui_cur (iter));
never executed: mb_copy (&needle_mbchars[j], &(iter).cur);
0
64 }-
65-
66 /* Fill the table.-
67 For 0 < i < m:-
68 0 < table[i] <= i is defined such that-
69 forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],-
70 and table[i] is as large as possible with this property.-
71 This implies:-
72 1) For 0 < i < m:-
73 If table[i] < i,-
74 needle[table[i]..i-1] = needle[0..i-1-table[i]].-
75 2) For 0 < i < m:-
76 rhaystack[0..i-1] == needle[0..i-1]-
77 and exists h, i <= h < m: rhaystack[h] != needle[h]-
78 implies-
79 forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].-
80 table[0] remains uninitialized. */-
81 {-
82 size_t i, j;-
83-
84 /* i = 1: Nothing to verify for x = 0. */-
85 table[1] = 1;-
86 j = 0;-
87-
88 for (i = 2; i < m; i++)
i < mDescription
TRUEnever evaluated
FALSEnever evaluated
0
89 {-
90 /* Here: j = i-1 - table[i-1].-
91 The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold-
92 for x < table[i-1], by induction.-
93 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */-
94 mbchar_t *b = &needle_mbchars[i - 1];-
95-
96 for (;;)-
97 {-
98 /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]-
99 is known to hold for x < i-1-j.-
100 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */-
101 if (mb_equal (*b, needle_mbchars[j]))
((*b).wc_valid...).bytes) == 0)Description
TRUEnever evaluated
FALSEnever evaluated
(*b).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
(needle_mbchars[j]).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
(*b).bytes == ...hars[j]).bytesDescription
TRUEnever evaluated
FALSEnever evaluated
memcmp ((*b).p...b).bytes) == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
102 {-
103 /* Set table[i] := i-1-j. */-
104 table[i] = i - ++j;-
105 break;
never executed: break;
0
106 }-
107 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds-
108 for x = i-1-j, because-
109 needle[i-1] != needle[j] = needle[i-1-x]. */-
110 if (j == 0)
j == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
111 {-
112 /* The inequality holds for all possible x. */-
113 table[i] = i;-
114 break;
never executed: break;
0
115 }-
116 /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds-
117 for i-1-j < x < i-1-j+table[j], because for these x:-
118 needle[x..i-2]-
119 = needle[x-(i-1-j)..j-1]-
120 != needle[0..j-1-(x-(i-1-j))] (by definition of table[j])-
121 = needle[0..i-2-x],-
122 hence needle[x..i-1] != needle[0..i-1-x].-
123 Furthermore-
124 needle[i-1-j+table[j]..i-2]-
125 = needle[table[j]..j-1]-
126 = needle[0..j-1-table[j]] (by definition of table[j]). */-
127 j = j - table[j];-
128 }
never executed: end of block
0
129 /* Here: j = i - table[i]. */-
130 }
never executed: end of block
0
131 }-
132-
133 /* Search, using the table to accelerate the processing. */-
134 {-
135 size_t j;-
136 mbui_iterator_t rhaystack;-
137 mbui_iterator_t phaystack;-
138-
139 *resultp = NULL;-
140 j = 0;-
141 mbui_init (rhaystack, haystack);-
142 mbui_init (phaystack, haystack);-
143 /* Invariant: phaystack = rhaystack + j. */-
144 while (mbui_avail (phaystack))
((phaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((phaystack).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
145 if (mb_equal (needle_mbchars[j], mbui_cur (phaystack)))
((needle_mbcha...).bytes) == 0)Description
TRUEnever evaluated
FALSEnever evaluated
(needle_mbchars[j]).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((phaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
(needle_mbchar...ck).cur).bytesDescription
TRUEnever evaluated
FALSEnever evaluated
memcmp ((needl...]).bytes) == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
146 {-
147 j++;-
148 mbui_advance (phaystack);-
149 if (j == m)
j == mDescription
TRUEnever evaluated
FALSEnever evaluated
0
150 {-
151 /* The entire needle has been found. */-
152 *resultp = mbui_cur_ptr (rhaystack);-
153 break;
never executed: break;
0
154 }-
155 }
never executed: end of block
0
156 else if (j > 0)
j > 0Description
TRUEnever evaluated
FALSEnever evaluated
0
157 {-
158 /* Found a match of needle[0..j-1], mismatch at needle[j]. */-
159 size_t count = table[j];-
160 j -= count;-
161 for (; count > 0; count--)
count > 0Description
TRUEnever evaluated
FALSEnever evaluated
0
162 {-
163 if (!mbui_avail (rhaystack))
((rhaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rhaystack).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
164 abort ();
never executed: abort ();
0
165 mbui_advance (rhaystack);-
166 }
never executed: end of block
0
167 }
never executed: end of block
0
168 else-
169 {-
170 /* Found a mismatch at needle[0] already. */-
171 if (!mbui_avail (rhaystack))
((rhaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rhaystack).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
172 abort ();
never executed: abort ();
0
173 mbui_advance (rhaystack);-
174 mbui_advance (phaystack);-
175 }
never executed: end of block
0
176 }-
177-
178 freea (memory);-
179 return true;
never executed: return 1 ;
0
180}-
181-
182/* Find the first occurrence of the character string NEEDLE in the character-
183 string HAYSTACK. Return NULL if NEEDLE is not found in HAYSTACK. */-
184char *-
185mbsstr (const char *haystack, const char *needle)-
186{-
187 /* Be careful not to look at the entire extent of haystack or needle-
188 until needed. This is useful because of these two cases:-
189 - haystack may be very long, and a match of needle found early,-
190 - needle may be very long, and not even a short initial segment of-
191 needle may be found in haystack. */-
192 if (MB_CUR_MAX > 1)
(__ctype_get_m...ur_max ()) > 1Description
TRUEnever evaluated
FALSEnever evaluated
0
193 {-
194 mbui_iterator_t iter_needle;-
195-
196 mbui_init (iter_needle, needle);-
197 if (mbui_avail (iter_needle))
((iter_needle).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_needle).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
198 {-
199 /* Minimizing the worst-case complexity:-
200 Let n = mbslen(haystack), m = mbslen(needle).-
201 The naïve algorithm is O(n*m) worst-case.-
202 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a-
203 memory allocation.-
204 To achieve linear complexity and yet amortize the cost of the-
205 memory allocation, we activate the Knuth-Morris-Pratt algorithm-
206 only once the naïve algorithm has already run for some time; more-
207 precisely, when-
208 - the outer loop count is >= 10,-
209 - the average number of comparisons per outer loop is >= 5,-
210 - the total number of comparisons is >= m.-
211 But we try it only once. If the memory allocation attempt failed,-
212 we don't retry it. */-
213 bool try_kmp = true;-
214 size_t outer_loop_count = 0;-
215 size_t comparison_count = 0;-
216 size_t last_ccount = 0; /* last comparison count */-
217 mbui_iterator_t iter_needle_last_ccount; /* = needle + last_ccount */-
218-
219 mbui_iterator_t iter_haystack;-
220-
221 mbui_init (iter_needle_last_ccount, needle);-
222 mbui_init (iter_haystack, haystack);-
223 for (;; mbui_advance (iter_haystack))-
224 {-
225 if (!mbui_avail (iter_haystack))
((iter_haystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_haystack).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
226 /* No match. */-
227 return NULL;
never executed: return ((void *)0) ;
0
228-
229 /* See whether it's advisable to use an asymptotically faster-
230 algorithm. */-
231 if (try_kmp
try_kmpDescription
TRUEnever evaluated
FALSEnever evaluated
0
232 && outer_loop_count >= 10
outer_loop_count >= 10Description
TRUEnever evaluated
FALSEnever evaluated
0
233 && comparison_count >= 5 * outer_loop_count)
comparison_cou...ter_loop_countDescription
TRUEnever evaluated
FALSEnever evaluated
0
234 {-
235 /* See if needle + comparison_count now reaches the end of-
236 needle. */-
237 size_t count = comparison_count - last_ccount;-
238 for (;-
239 count > 0 && mbui_avail (iter_needle_last_ccount);
count > 0Description
TRUEnever evaluated
FALSEnever evaluated
((iter_needle_....cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_needle_...).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
240 count--)-
241 mbui_advance (iter_needle_last_ccount);
never executed: ((iter_needle_last_ccount).cur.ptr += (iter_needle_last_ccount).cur.bytes, (iter_needle_last_ccount).next_done = 0 );
0
242 last_ccount = comparison_count;-
243 if (!mbui_avail (iter_needle_last_ccount))
((iter_needle_....cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_needle_...).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
244 {-
245 /* Try the Knuth-Morris-Pratt algorithm. */-
246 const char *result;-
247 bool success =-
248 knuth_morris_pratt_multibyte (haystack, needle,-
249 &result);-
250 if (success)
successDescription
TRUEnever evaluated
FALSEnever evaluated
0
251 return (char *) result;
never executed: return (char *) result;
0
252 try_kmp = false;-
253 }
never executed: end of block
0
254 }
never executed: end of block
0
255-
256 outer_loop_count++;-
257 comparison_count++;-
258 if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle)))
(((iter_haysta...).bytes) == 0)Description
TRUEnever evaluated
FALSEnever evaluated
((iter_haystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_needle).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((iter_haystac...le).cur).bytesDescription
TRUEnever evaluated
FALSEnever evaluated
memcmp (((iter...r).bytes) == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
259 /* The first character matches. */-
260 {-
261 mbui_iterator_t rhaystack;-
262 mbui_iterator_t rneedle;-
263-
264 memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));-
265 mbui_advance (rhaystack);-
266-
267 mbui_init (rneedle, needle);-
268 if (!mbui_avail (rneedle))
((rneedle).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rneedle).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
269 abort ();
never executed: abort ();
0
270 mbui_advance (rneedle);-
271-
272 for (;; mbui_advance (rhaystack), mbui_advance (rneedle))-
273 {-
274 if (!mbui_avail (rneedle))
((rneedle).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rneedle).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
275 /* Found a match. */-
276 return (char *) mbui_cur_ptr (iter_haystack);
never executed: return (char *) (iter_haystack).cur.ptr;
0
277 if (!mbui_avail (rhaystack))
((rhaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rhaystack).cur).wc == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
278 /* No match. */-
279 return NULL;
never executed: return ((void *)0) ;
0
280 comparison_count++;-
281 if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle)))
!(((rhaystack)...).bytes) == 0)Description
TRUEnever evaluated
FALSEnever evaluated
((rhaystack).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rneedle).cur).wc_validDescription
TRUEnever evaluated
FALSEnever evaluated
((rhaystack).c...le).cur).bytesDescription
TRUEnever evaluated
FALSEnever evaluated
memcmp (((rhay...r).bytes) == 0Description
TRUEnever evaluated
FALSEnever evaluated
0
282 /* Nothing in this round. */-
283 break;
never executed: break;
0
284 }
never executed: end of block
0
285 }
never executed: end of block
0
286 }
never executed: end of block
0
287 }
never executed: end of block
0
288 else-
289 return (char *) haystack;
never executed: return (char *) haystack;
0
290 }-
291 else-
292 {-
293 if (*needle != '\0')
*needle != '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
294 {-
295 /* Minimizing the worst-case complexity:-
296 Let n = strlen(haystack), m = strlen(needle).-
297 The naïve algorithm is O(n*m) worst-case.-
298 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a-
299 memory allocation.-
300 To achieve linear complexity and yet amortize the cost of the-
301 memory allocation, we activate the Knuth-Morris-Pratt algorithm-
302 only once the naïve algorithm has already run for some time; more-
303 precisely, when-
304 - the outer loop count is >= 10,-
305 - the average number of comparisons per outer loop is >= 5,-
306 - the total number of comparisons is >= m.-
307 But we try it only once. If the memory allocation attempt failed,-
308 we don't retry it. */-
309 bool try_kmp = true;-
310 size_t outer_loop_count = 0;-
311 size_t comparison_count = 0;-
312 size_t last_ccount = 0; /* last comparison count */-
313 const char *needle_last_ccount = needle; /* = needle + last_ccount */-
314-
315 /* Speed up the following searches of needle by caching its first-
316 character. */-
317 char b = *needle++;-
318-
319 for (;; haystack++)-
320 {-
321 if (*haystack == '\0')
*haystack == '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
322 /* No match. */-
323 return NULL;
never executed: return ((void *)0) ;
0
324-
325 /* See whether it's advisable to use an asymptotically faster-
326 algorithm. */-
327 if (try_kmp
try_kmpDescription
TRUEnever evaluated
FALSEnever evaluated
0
328 && outer_loop_count >= 10
outer_loop_count >= 10Description
TRUEnever evaluated
FALSEnever evaluated
0
329 && comparison_count >= 5 * outer_loop_count)
comparison_cou...ter_loop_countDescription
TRUEnever evaluated
FALSEnever evaluated
0
330 {-
331 /* See if needle + comparison_count now reaches the end of-
332 needle. */-
333 if (needle_last_ccount != NULL)
needle_last_cc...!= ((void *)0)Description
TRUEnever evaluated
FALSEnever evaluated
0
334 {-
335 needle_last_ccount +=-
336 strnlen (needle_last_ccount,-
337 comparison_count - last_ccount);-
338 if (*needle_last_ccount == '\0')
*needle_last_ccount == '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
339 needle_last_ccount = NULL;
never executed: needle_last_ccount = ((void *)0) ;
0
340 last_ccount = comparison_count;-
341 }
never executed: end of block
0
342 if (needle_last_ccount == NULL)
needle_last_cc...== ((void *)0)Description
TRUEnever evaluated
FALSEnever evaluated
0
343 {-
344 /* Try the Knuth-Morris-Pratt algorithm. */-
345 const unsigned char *result;-
346 bool success =-
347 knuth_morris_pratt ((const unsigned char *) haystack,-
348 (const unsigned char *) (needle - 1),-
349 strlen (needle - 1),-
350 &result);-
351 if (success)
successDescription
TRUEnever evaluated
FALSEnever evaluated
0
352 return (char *) result;
never executed: return (char *) result;
0
353 try_kmp = false;-
354 }
never executed: end of block
0
355 }
never executed: end of block
0
356-
357 outer_loop_count++;-
358 comparison_count++;-
359 if (*haystack == b)
*haystack == bDescription
TRUEnever evaluated
FALSEnever evaluated
0
360 /* The first character matches. */-
361 {-
362 const char *rhaystack = haystack + 1;-
363 const char *rneedle = needle;-
364-
365 for (;; rhaystack++, rneedle++)-
366 {-
367 if (*rneedle == '\0')
*rneedle == '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
368 /* Found a match. */-
369 return (char *) haystack;
never executed: return (char *) haystack;
0
370 if (*rhaystack == '\0')
*rhaystack == '\0'Description
TRUEnever evaluated
FALSEnever evaluated
0
371 /* No match. */-
372 return NULL;
never executed: return ((void *)0) ;
0
373 comparison_count++;-
374 if (*rhaystack != *rneedle)
*rhaystack != *rneedleDescription
TRUEnever evaluated
FALSEnever evaluated
0
375 /* Nothing in this round. */-
376 break;
never executed: break;
0
377 }
never executed: end of block
0
378 }
never executed: end of block
0
379 }
never executed: end of block
0
380 }
never executed: end of block
0
381 else-
382 return (char *) haystack;
never executed: return (char *) haystack;
0
383 }-
384}-
Source codeSwitch to Preprocessed file

Generated by Squish Coco 4.1.2