Absolute File Name: | /home/opencoverage/opencoverage/guest-scripts/qtdeclarative/src/qtdeclarative/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.h |
Source code | Switch to Preprocessed file |
Line | Source | Count | ||||||
---|---|---|---|---|---|---|---|---|
1 | /* | - | ||||||
2 | * Copyright (C) 2012 Apple Inc. All rights reserved. | - | ||||||
3 | * | - | ||||||
4 | * Redistribution and use in source and binary forms, with or without | - | ||||||
5 | * modification, are permitted provided that the following conditions | - | ||||||
6 | * are met: | - | ||||||
7 | * 1. Redistributions of source code must retain the above copyright | - | ||||||
8 | * notice, this list of conditions and the following disclaimer. | - | ||||||
9 | * 2. Redistributions in binary form must reproduce the above copyright | - | ||||||
10 | * notice, this list of conditions and the following disclaimer in the | - | ||||||
11 | * documentation and/or other materials provided with the distribution. | - | ||||||
12 | * | - | ||||||
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | - | ||||||
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | - | ||||||
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | - | ||||||
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | - | ||||||
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | - | ||||||
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | - | ||||||
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | - | ||||||
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | - | ||||||
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | - | ||||||
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | - | ||||||
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | - | ||||||
24 | */ | - | ||||||
25 | - | |||||||
26 | #ifndef YarrCanonicalizeUCS2_H | - | ||||||
27 | #define YarrCanonicalizeUCS2_H | - | ||||||
28 | - | |||||||
29 | #include <stdint.h> | - | ||||||
30 | #include <wtf/unicode/Unicode.h> | - | ||||||
31 | - | |||||||
32 | namespace JSC { namespace Yarr { | - | ||||||
33 | - | |||||||
34 | // This set of data (autogenerated using YarrCanonicalizeUCS2.js into YarrCanonicalizeUCS2.cpp) | - | ||||||
35 | // provides information for each UCS2 code point as to the set of code points that it should | - | ||||||
36 | // match under the ES5.1 case insensitive RegExp matching rules, specified in 15.10.2.8. | - | ||||||
37 | enum UCS2CanonicalizationType { | - | ||||||
38 | CanonicalizeUnique, // No canonically equal values, e.g. 0x0. | - | ||||||
39 | CanonicalizeSet, // Value indicates a set in characterSetInfo. | - | ||||||
40 | CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61. | - | ||||||
41 | CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41. | - | ||||||
42 | CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5. | - | ||||||
43 | CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242. | - | ||||||
44 | }; | - | ||||||
45 | struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; }; | - | ||||||
46 | extern const size_t UCS2_CANONICALIZATION_RANGES; | - | ||||||
47 | extern uint16_t* characterSetInfo[]; | - | ||||||
48 | extern UCS2CanonicalizationRange rangeInfo[]; | - | ||||||
49 | - | |||||||
50 | // This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to | - | ||||||
51 | // the set of Latin1 codepoints that could match. | - | ||||||
52 | enum LatinCanonicalizationType { | - | ||||||
53 | CanonicalizeLatinSelf, // This character is in the Latin1 range, but has no canonical equivalent in the range. | - | ||||||
54 | CanonicalizeLatinMask0x20, // One of a pair of characters, under the mask 0x20. | - | ||||||
55 | CanonicalizeLatinOther, // This character is not in the Latin1 range, but canonicalizes to another that is. | - | ||||||
56 | CanonicalizeLatinInvalid, // Cannot match against Latin1 input. | - | ||||||
57 | }; | - | ||||||
58 | struct LatinCanonicalizationRange { uint16_t begin, end, value, type; }; | - | ||||||
59 | extern const size_t LATIN_CANONICALIZATION_RANGES; | - | ||||||
60 | extern LatinCanonicalizationRange latinRangeInfo[]; | - | ||||||
61 | - | |||||||
62 | // This searches in log2 time over ~364 entries, so should typically result in 8 compares. | - | ||||||
63 | inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch) | - | ||||||
64 | { | - | ||||||
65 | UCS2CanonicalizationRange* info = rangeInfo; | - | ||||||
66 | size_t entries = UCS2_CANONICALIZATION_RANGES; | - | ||||||
67 | - | |||||||
68 | while (true) { | - | ||||||
69 | size_t candidate = entries >> 1; | - | ||||||
70 | UCS2CanonicalizationRange* candidateInfo = info + candidate; | - | ||||||
71 | if (ch < candidateInfo->begin)
| 118-660 | ||||||
72 | entries = candidate; executed 660 times by 2 tests: entries = candidate; Executed by:
| 660 | ||||||
73 | else if (ch <= candidateInfo->end)
| 28-90 | ||||||
74 | return candidateInfo; executed 90 times by 2 tests: return candidateInfo; Executed by:
| 90 | ||||||
75 | else { | - | ||||||
76 | info = candidateInfo + 1; | - | ||||||
77 | entries -= (candidate + 1); | - | ||||||
78 | } executed 28 times by 1 test: end of block Executed by:
| 28 | ||||||
79 | } | - | ||||||
80 | } never executed: end of block | 0 | ||||||
81 | - | |||||||
82 | // Should only be called for characters that have one canonically matching value. | - | ||||||
83 | inline UChar getCanonicalPair(UCS2CanonicalizationRange* info, UChar ch) | - | ||||||
84 | { | - | ||||||
85 | ASSERT(ch >= info->begin && ch <= info->end); | - | ||||||
86 | switch (info->type) { | - | ||||||
87 | case CanonicalizeRangeLo: never executed: case CanonicalizeRangeLo: | 0 | ||||||
88 | return ch + info->value; never executed: return ch + info->value; | 0 | ||||||
89 | case CanonicalizeRangeHi: never executed: case CanonicalizeRangeHi: | 0 | ||||||
90 | return ch - info->value; never executed: return ch - info->value; | 0 | ||||||
91 | case CanonicalizeAlternatingAligned: never executed: case CanonicalizeAlternatingAligned: | 0 | ||||||
92 | return ch ^ 1; never executed: return ch ^ 1; | 0 | ||||||
93 | case CanonicalizeAlternatingUnaligned: never executed: case CanonicalizeAlternatingUnaligned: | 0 | ||||||
94 | return ((ch - 1) ^ 1) + 1; never executed: return ((ch - 1) ^ 1) + 1; | 0 | ||||||
95 | default: never executed: default: | 0 | ||||||
96 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
97 | } never executed: end of block | 0 | ||||||
98 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
99 | return 0; never executed: return 0; | 0 | ||||||
100 | } | - | ||||||
101 | - | |||||||
102 | // Returns true if no other UCS2 codepoint can match this value. | - | ||||||
103 | inline bool isCanonicallyUnique(UChar ch) | - | ||||||
104 | { | - | ||||||
105 | return rangeInfoFor(ch)->type == CanonicalizeUnique; executed 86 times by 2 tests: return rangeInfoFor(ch)->type == CanonicalizeUnique; Executed by:
| 86 | ||||||
106 | } | - | ||||||
107 | - | |||||||
108 | // Returns true if values are equal, under the canonicalization rules. | - | ||||||
109 | inline bool areCanonicallyEquivalent(UChar a, UChar b) | - | ||||||
110 | { | - | ||||||
111 | UCS2CanonicalizationRange* info = rangeInfoFor(a); | - | ||||||
112 | switch (info->type) { | - | ||||||
113 | case CanonicalizeUnique: never executed: case CanonicalizeUnique: | 0 | ||||||
114 | return a == b; never executed: return a == b; | 0 | ||||||
115 | case CanonicalizeSet: { never executed: case CanonicalizeSet: | 0 | ||||||
116 | for (uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {
| 0 | ||||||
117 | if (a == b)
| 0 | ||||||
118 | return true; never executed: return true; | 0 | ||||||
119 | } never executed: end of block | 0 | ||||||
120 | return false; never executed: return false; | 0 | ||||||
121 | } | - | ||||||
122 | case CanonicalizeRangeLo: never executed: case CanonicalizeRangeLo: | 0 | ||||||
123 | return (a == b) || (a + info->value == b); never executed: return (a == b) || (a + info->value == b); | 0 | ||||||
124 | case CanonicalizeRangeHi: never executed: case CanonicalizeRangeHi: | 0 | ||||||
125 | return (a == b) || (a - info->value == b); never executed: return (a == b) || (a - info->value == b); | 0 | ||||||
126 | case CanonicalizeAlternatingAligned: never executed: case CanonicalizeAlternatingAligned: | 0 | ||||||
127 | return (a | 1) == (b | 1); never executed: return (a | 1) == (b | 1); | 0 | ||||||
128 | case CanonicalizeAlternatingUnaligned: never executed: case CanonicalizeAlternatingUnaligned: | 0 | ||||||
129 | return ((a - 1) | 1) == ((b - 1) | 1); never executed: return ((a - 1) | 1) == ((b - 1) | 1); | 0 | ||||||
130 | } | - | ||||||
131 | - | |||||||
132 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
133 | return false; never executed: return false; | 0 | ||||||
134 | } | - | ||||||
135 | - | |||||||
136 | } } // JSC::Yarr | - | ||||||
137 | - | |||||||
138 | #endif | - | ||||||
Source code | Switch to Preprocessed file |