| Absolute File Name: | /home/opencoverage/opencoverage/guest-scripts/qtdeclarative/src/qtdeclarative/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.h |
| Source code | Switch to Preprocessed file |
| Line | Source | Count | ||||||
|---|---|---|---|---|---|---|---|---|
| 1 | /* | - | ||||||
| 2 | * Copyright (C) 2012 Apple Inc. All rights reserved. | - | ||||||
| 3 | * | - | ||||||
| 4 | * Redistribution and use in source and binary forms, with or without | - | ||||||
| 5 | * modification, are permitted provided that the following conditions | - | ||||||
| 6 | * are met: | - | ||||||
| 7 | * 1. Redistributions of source code must retain the above copyright | - | ||||||
| 8 | * notice, this list of conditions and the following disclaimer. | - | ||||||
| 9 | * 2. Redistributions in binary form must reproduce the above copyright | - | ||||||
| 10 | * notice, this list of conditions and the following disclaimer in the | - | ||||||
| 11 | * documentation and/or other materials provided with the distribution. | - | ||||||
| 12 | * | - | ||||||
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | - | ||||||
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | - | ||||||
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | - | ||||||
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | - | ||||||
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | - | ||||||
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | - | ||||||
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | - | ||||||
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | - | ||||||
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | - | ||||||
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | - | ||||||
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | - | ||||||
| 24 | */ | - | ||||||
| 25 | - | |||||||
| 26 | #ifndef YarrCanonicalizeUCS2_H | - | ||||||
| 27 | #define YarrCanonicalizeUCS2_H | - | ||||||
| 28 | - | |||||||
| 29 | #include <stdint.h> | - | ||||||
| 30 | #include <wtf/unicode/Unicode.h> | - | ||||||
| 31 | - | |||||||
| 32 | namespace JSC { namespace Yarr { | - | ||||||
| 33 | - | |||||||
| 34 | // This set of data (autogenerated using YarrCanonicalizeUCS2.js into YarrCanonicalizeUCS2.cpp) | - | ||||||
| 35 | // provides information for each UCS2 code point as to the set of code points that it should | - | ||||||
| 36 | // match under the ES5.1 case insensitive RegExp matching rules, specified in 15.10.2.8. | - | ||||||
| 37 | enum UCS2CanonicalizationType { | - | ||||||
| 38 | CanonicalizeUnique, // No canonically equal values, e.g. 0x0. | - | ||||||
| 39 | CanonicalizeSet, // Value indicates a set in characterSetInfo. | - | ||||||
| 40 | CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61. | - | ||||||
| 41 | CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41. | - | ||||||
| 42 | CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5. | - | ||||||
| 43 | CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242. | - | ||||||
| 44 | }; | - | ||||||
| 45 | struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; }; | - | ||||||
| 46 | extern const size_t UCS2_CANONICALIZATION_RANGES; | - | ||||||
| 47 | extern uint16_t* characterSetInfo[]; | - | ||||||
| 48 | extern UCS2CanonicalizationRange rangeInfo[]; | - | ||||||
| 49 | - | |||||||
| 50 | // This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to | - | ||||||
| 51 | // the set of Latin1 codepoints that could match. | - | ||||||
| 52 | enum LatinCanonicalizationType { | - | ||||||
| 53 | CanonicalizeLatinSelf, // This character is in the Latin1 range, but has no canonical equivalent in the range. | - | ||||||
| 54 | CanonicalizeLatinMask0x20, // One of a pair of characters, under the mask 0x20. | - | ||||||
| 55 | CanonicalizeLatinOther, // This character is not in the Latin1 range, but canonicalizes to another that is. | - | ||||||
| 56 | CanonicalizeLatinInvalid, // Cannot match against Latin1 input. | - | ||||||
| 57 | }; | - | ||||||
| 58 | struct LatinCanonicalizationRange { uint16_t begin, end, value, type; }; | - | ||||||
| 59 | extern const size_t LATIN_CANONICALIZATION_RANGES; | - | ||||||
| 60 | extern LatinCanonicalizationRange latinRangeInfo[]; | - | ||||||
| 61 | - | |||||||
| 62 | // This searches in log2 time over ~364 entries, so should typically result in 8 compares. | - | ||||||
| 63 | inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch) | - | ||||||
| 64 | { | - | ||||||
| 65 | UCS2CanonicalizationRange* info = rangeInfo; | - | ||||||
| 66 | size_t entries = UCS2_CANONICALIZATION_RANGES; | - | ||||||
| 67 | - | |||||||
| 68 | while (true) { | - | ||||||
| 69 | size_t candidate = entries >> 1; | - | ||||||
| 70 | UCS2CanonicalizationRange* candidateInfo = info + candidate; | - | ||||||
| 71 | if (ch < candidateInfo->begin)
| 118-660 | ||||||
| 72 | entries = candidate; executed 660 times by 2 tests: entries = candidate;Executed by:
| 660 | ||||||
| 73 | else if (ch <= candidateInfo->end)
| 28-90 | ||||||
| 74 | return candidateInfo; executed 90 times by 2 tests: return candidateInfo;Executed by:
| 90 | ||||||
| 75 | else { | - | ||||||
| 76 | info = candidateInfo + 1; | - | ||||||
| 77 | entries -= (candidate + 1); | - | ||||||
| 78 | } executed 28 times by 1 test: end of blockExecuted by:
| 28 | ||||||
| 79 | } | - | ||||||
| 80 | } never executed: end of block | 0 | ||||||
| 81 | - | |||||||
| 82 | // Should only be called for characters that have one canonically matching value. | - | ||||||
| 83 | inline UChar getCanonicalPair(UCS2CanonicalizationRange* info, UChar ch) | - | ||||||
| 84 | { | - | ||||||
| 85 | ASSERT(ch >= info->begin && ch <= info->end); | - | ||||||
| 86 | switch (info->type) { | - | ||||||
| 87 | case CanonicalizeRangeLo: never executed: case CanonicalizeRangeLo: | 0 | ||||||
| 88 | return ch + info->value; never executed: return ch + info->value; | 0 | ||||||
| 89 | case CanonicalizeRangeHi: never executed: case CanonicalizeRangeHi: | 0 | ||||||
| 90 | return ch - info->value; never executed: return ch - info->value; | 0 | ||||||
| 91 | case CanonicalizeAlternatingAligned: never executed: case CanonicalizeAlternatingAligned: | 0 | ||||||
| 92 | return ch ^ 1; never executed: return ch ^ 1; | 0 | ||||||
| 93 | case CanonicalizeAlternatingUnaligned: never executed: case CanonicalizeAlternatingUnaligned: | 0 | ||||||
| 94 | return ((ch - 1) ^ 1) + 1; never executed: return ((ch - 1) ^ 1) + 1; | 0 | ||||||
| 95 | default: never executed: default: | 0 | ||||||
| 96 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
| 97 | } never executed: end of block | 0 | ||||||
| 98 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
| 99 | return 0; never executed: return 0; | 0 | ||||||
| 100 | } | - | ||||||
| 101 | - | |||||||
| 102 | // Returns true if no other UCS2 codepoint can match this value. | - | ||||||
| 103 | inline bool isCanonicallyUnique(UChar ch) | - | ||||||
| 104 | { | - | ||||||
| 105 | return rangeInfoFor(ch)->type == CanonicalizeUnique; executed 86 times by 2 tests: return rangeInfoFor(ch)->type == CanonicalizeUnique;Executed by:
| 86 | ||||||
| 106 | } | - | ||||||
| 107 | - | |||||||
| 108 | // Returns true if values are equal, under the canonicalization rules. | - | ||||||
| 109 | inline bool areCanonicallyEquivalent(UChar a, UChar b) | - | ||||||
| 110 | { | - | ||||||
| 111 | UCS2CanonicalizationRange* info = rangeInfoFor(a); | - | ||||||
| 112 | switch (info->type) { | - | ||||||
| 113 | case CanonicalizeUnique: never executed: case CanonicalizeUnique: | 0 | ||||||
| 114 | return a == b; never executed: return a == b; | 0 | ||||||
| 115 | case CanonicalizeSet: { never executed: case CanonicalizeSet: | 0 | ||||||
| 116 | for (uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {
| 0 | ||||||
| 117 | if (a == b)
| 0 | ||||||
| 118 | return true; never executed: return true; | 0 | ||||||
| 119 | } never executed: end of block | 0 | ||||||
| 120 | return false; never executed: return false; | 0 | ||||||
| 121 | } | - | ||||||
| 122 | case CanonicalizeRangeLo: never executed: case CanonicalizeRangeLo: | 0 | ||||||
| 123 | return (a == b) || (a + info->value == b); never executed: return (a == b) || (a + info->value == b); | 0 | ||||||
| 124 | case CanonicalizeRangeHi: never executed: case CanonicalizeRangeHi: | 0 | ||||||
| 125 | return (a == b) || (a - info->value == b); never executed: return (a == b) || (a - info->value == b); | 0 | ||||||
| 126 | case CanonicalizeAlternatingAligned: never executed: case CanonicalizeAlternatingAligned: | 0 | ||||||
| 127 | return (a | 1) == (b | 1); never executed: return (a | 1) == (b | 1); | 0 | ||||||
| 128 | case CanonicalizeAlternatingUnaligned: never executed: case CanonicalizeAlternatingUnaligned: | 0 | ||||||
| 129 | return ((a - 1) | 1) == ((b - 1) | 1); never executed: return ((a - 1) | 1) == ((b - 1) | 1); | 0 | ||||||
| 130 | } | - | ||||||
| 131 | - | |||||||
| 132 | RELEASE_ASSERT_NOT_REACHED(); | - | ||||||
| 133 | return false; never executed: return false; | 0 | ||||||
| 134 | } | - | ||||||
| 135 | - | |||||||
| 136 | } } // JSC::Yarr | - | ||||||
| 137 | - | |||||||
| 138 | #endif | - | ||||||
| Source code | Switch to Preprocessed file |