root/eval/eval-unicode.cpp
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- testNonASCIICharacter
- isNonASCIIIdentifierStart
- isNonASCIIIdentifierSubsequent
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine.].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2008
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "avmplus.h"
#ifdef VMCFG_EVAL
#include "eval.h"
/* Rudimentary Unicode support - enough to handle identifier lexing. These tables
* are slightly optimized for space, but more could be done. In particular, delta
* coding can be useful because most deltas - both in character ranges and between
* ranges - fit in a single byte. Delta coding might shrink the tables by slightly
* less than a factor of 2. As it is, the total size of these tables is about 2KB.
*
* Speed is not a big issue here because almost no programs have identifiers that
* contain characters outside the ASCII range, and ASCII is handled specially
* outside this file using faster code paths (see code in eval-lex.cpp).
*
* The tables are generated from the Unicode data file by generate-unicode-tables.c
* in this directory.
*/
namespace avmplus
{
namespace RTC
{
struct Range {
uint16_t lo;
uint16_t hi;
};
struct UnicodeTable {
uint32_t nranges;
Range const * ranges;
uint32_t nsingletons;
uint16_t const * singletons;
};
static const Range identifier_start_ranges[] = {
{0x0041, 0x005A},
{0x0061, 0x007A},
{0x00C0, 0x00D6},
{0x00D8, 0x00F6},
{0x00F8, 0x02C1},
{0x02C6, 0x02D1},
{0x02E0, 0x02E4},
{0x0370, 0x0374},
{0x0376, 0x0377},
{0x037A, 0x037D},
{0x0388, 0x038A},
{0x038E, 0x03A1},
{0x03A3, 0x03F5},
{0x03F7, 0x0481},
{0x048A, 0x0523},
{0x0531, 0x0556},
{0x0561, 0x0587},
{0x05D0, 0x05EA},
{0x05F0, 0x05F2},
{0x0621, 0x064A},
{0x066E, 0x066F},
{0x0671, 0x06D3},
{0x06E5, 0x06E6},
{0x06EE, 0x06EF},
{0x06FA, 0x06FC},
{0x0712, 0x072F},
{0x074D, 0x07A5},
{0x07CA, 0x07EA},
{0x07F4, 0x07F5},
{0x0904, 0x0939},
{0x0958, 0x0961},
{0x0971, 0x0972},
{0x097B, 0x097F},
{0x0985, 0x098C},
{0x098F, 0x0990},
{0x0993, 0x09A8},
{0x09AA, 0x09B0},
{0x09B6, 0x09B9},
{0x09DC, 0x09DD},
{0x09DF, 0x09E1},
{0x09F0, 0x09F1},
{0x0A05, 0x0A0A},
{0x0A0F, 0x0A10},
{0x0A13, 0x0A28},
{0x0A2A, 0x0A30},
{0x0A32, 0x0A33},
{0x0A35, 0x0A36},
{0x0A38, 0x0A39},
{0x0A59, 0x0A5C},
{0x0A72, 0x0A74},
{0x0A85, 0x0A8D},
{0x0A8F, 0x0A91},
{0x0A93, 0x0AA8},
{0x0AAA, 0x0AB0},
{0x0AB2, 0x0AB3},
{0x0AB5, 0x0AB9},
{0x0AE0, 0x0AE1},
{0x0B05, 0x0B0C},
{0x0B0F, 0x0B10},
{0x0B13, 0x0B28},
{0x0B2A, 0x0B30},
{0x0B32, 0x0B33},
{0x0B35, 0x0B39},
{0x0B5C, 0x0B5D},
{0x0B5F, 0x0B61},
{0x0B85, 0x0B8A},
{0x0B8E, 0x0B90},
{0x0B92, 0x0B95},
{0x0B99, 0x0B9A},
{0x0B9E, 0x0B9F},
{0x0BA3, 0x0BA4},
{0x0BA8, 0x0BAA},
{0x0BAE, 0x0BB9},
{0x0C05, 0x0C0C},
{0x0C0E, 0x0C10},
{0x0C12, 0x0C28},
{0x0C2A, 0x0C33},
{0x0C35, 0x0C39},
{0x0C58, 0x0C59},
{0x0C60, 0x0C61},
{0x0C85, 0x0C8C},
{0x0C8E, 0x0C90},
{0x0C92, 0x0CA8},
{0x0CAA, 0x0CB3},
{0x0CB5, 0x0CB9},
{0x0CE0, 0x0CE1},
{0x0D05, 0x0D0C},
{0x0D0E, 0x0D10},
{0x0D12, 0x0D28},
{0x0D2A, 0x0D39},
{0x0D60, 0x0D61},
{0x0D7A, 0x0D7F},
{0x0D85, 0x0D96},
{0x0D9A, 0x0DB1},
{0x0DB3, 0x0DBB},
{0x0DC0, 0x0DC6},
{0x0E01, 0x0E30},
{0x0E32, 0x0E33},
{0x0E40, 0x0E46},
{0x0E81, 0x0E82},
{0x0E87, 0x0E88},
{0x0E94, 0x0E97},
{0x0E99, 0x0E9F},
{0x0EA1, 0x0EA3},
{0x0EAA, 0x0EAB},
{0x0EAD, 0x0EB0},
{0x0EB2, 0x0EB3},
{0x0EC0, 0x0EC4},
{0x0EDC, 0x0EDD},
{0x0F40, 0x0F47},
{0x0F49, 0x0F6C},
{0x0F88, 0x0F8B},
{0x1000, 0x102A},
{0x1050, 0x1055},
{0x105A, 0x105D},
{0x1065, 0x1066},
{0x106E, 0x1070},
{0x1075, 0x1081},
{0x10A0, 0x10C5},
{0x10D0, 0x10FA},
{0x1100, 0x1159},
{0x115F, 0x11A2},
{0x11A8, 0x11F9},
{0x1200, 0x1248},
{0x124A, 0x124D},
{0x1250, 0x1256},
{0x125A, 0x125D},
{0x1260, 0x1288},
{0x128A, 0x128D},
{0x1290, 0x12B0},
{0x12B2, 0x12B5},
{0x12B8, 0x12BE},
{0x12C2, 0x12C5},
{0x12C8, 0x12D6},
{0x12D8, 0x1310},
{0x1312, 0x1315},
{0x1318, 0x135A},
{0x1380, 0x138F},
{0x13A0, 0x13F4},
{0x1401, 0x166C},
{0x166F, 0x1676},
{0x1681, 0x169A},
{0x16A0, 0x16EA},
{0x16EE, 0x16F0},
{0x1700, 0x170C},
{0x170E, 0x1711},
{0x1720, 0x1731},
{0x1740, 0x1751},
{0x1760, 0x176C},
{0x176E, 0x1770},
{0x1780, 0x17B3},
{0x1820, 0x1877},
{0x1880, 0x18A8},
{0x1900, 0x191C},
{0x1950, 0x196D},
{0x1970, 0x1974},
{0x1980, 0x19A9},
{0x19C1, 0x19C7},
{0x1A00, 0x1A16},
{0x1B05, 0x1B33},
{0x1B45, 0x1B4B},
{0x1B83, 0x1BA0},
{0x1BAE, 0x1BAF},
{0x1C00, 0x1C23},
{0x1C4D, 0x1C4F},
{0x1C5A, 0x1C7D},
{0x1D00, 0x1DBF},
{0x1E00, 0x1F15},
{0x1F18, 0x1F1D},
{0x1F20, 0x1F45},
{0x1F48, 0x1F4D},
{0x1F50, 0x1F57},
{0x1F5F, 0x1F7D},
{0x1F80, 0x1FB4},
{0x1FB6, 0x1FBC},
{0x1FC2, 0x1FC4},
{0x1FC6, 0x1FCC},
{0x1FD0, 0x1FD3},
{0x1FD6, 0x1FDB},
{0x1FE0, 0x1FEC},
{0x1FF2, 0x1FF4},
{0x1FF6, 0x1FFC},
{0x2090, 0x2094},
{0x210A, 0x2113},
{0x2119, 0x211D},
{0x212A, 0x212D},
{0x212F, 0x2139},
{0x213C, 0x213F},
{0x2145, 0x2149},
{0x2160, 0x2188},
{0x2C00, 0x2C2E},
{0x2C30, 0x2C5E},
{0x2C60, 0x2C6F},
{0x2C71, 0x2C7D},
{0x2C80, 0x2CE4},
{0x2D00, 0x2D25},
{0x2D30, 0x2D65},
{0x2D80, 0x2D96},
{0x2DA0, 0x2DA6},
{0x2DA8, 0x2DAE},
{0x2DB0, 0x2DB6},
{0x2DB8, 0x2DBE},
{0x2DC0, 0x2DC6},
{0x2DC8, 0x2DCE},
{0x2DD0, 0x2DD6},
{0x2DD8, 0x2DDE},
{0x3005, 0x3007},
{0x3021, 0x3029},
{0x3031, 0x3035},
{0x3038, 0x303C},
{0x3041, 0x3096},
{0x309D, 0x309F},
{0x30A1, 0x30FA},
{0x30FC, 0x30FF},
{0x3105, 0x312D},
{0x3131, 0x318E},
{0x31A0, 0x31B7},
{0x31F0, 0x31FF},
{0xA000, 0xA48C},
{0xA500, 0xA60C},
{0xA610, 0xA61F},
{0xA62A, 0xA62B},
{0xA640, 0xA65F},
{0xA662, 0xA66E},
{0xA67F, 0xA697},
{0xA717, 0xA71F},
{0xA722, 0xA788},
{0xA78B, 0xA78C},
{0xA7FB, 0xA801},
{0xA803, 0xA805},
{0xA807, 0xA80A},
{0xA80C, 0xA822},
{0xA840, 0xA873},
{0xA882, 0xA8B3},
{0xA90A, 0xA925},
{0xA930, 0xA946},
{0xAA00, 0xAA28},
{0xAA40, 0xAA42},
{0xAA44, 0xAA4B},
{0xF900, 0xFA2D},
{0xFA30, 0xFA6A},
{0xFA70, 0xFAD9},
{0xFB00, 0xFB06},
{0xFB13, 0xFB17},
{0xFB1F, 0xFB28},
{0xFB2A, 0xFB36},
{0xFB38, 0xFB3C},
{0xFB40, 0xFB41},
{0xFB43, 0xFB44},
{0xFB46, 0xFBB1},
{0xFBD3, 0xFD3D},
{0xFD50, 0xFD8F},
{0xFD92, 0xFDC7},
{0xFDF0, 0xFDFB},
{0xFE70, 0xFE74},
{0xFE76, 0xFEFC},
{0xFF21, 0xFF3A},
{0xFF41, 0xFF5A},
{0xFF66, 0xFFBE},
{0xFFC2, 0xFFC7},
{0xFFCA, 0xFFCF},
{0xFFD2, 0xFFD7},
{0xFFDA, 0xFFDC},
};
static const uint16_t identifier_start_singletons[] = {
0x00AA,
0x00B5,
0x00BA,
0x02EC,
0x02EE,
0x0386,
0x038C,
0x0559,
0x06D5,
0x06FF,
0x0710,
0x07B1,
0x07FA,
0x093D,
0x0950,
0x09B2,
0x09BD,
0x09CE,
0x0A5E,
0x0ABD,
0x0AD0,
0x0B3D,
0x0B71,
0x0B83,
0x0B9C,
0x0BD0,
0x0C3D,
0x0CBD,
0x0CDE,
0x0D3D,
0x0DBD,
0x0E84,
0x0E8A,
0x0E8D,
0x0EA5,
0x0EA7,
0x0EBD,
0x0EC6,
0x0F00,
0x103F,
0x1061,
0x108E,
0x10FC,
0x1258,
0x12C0,
0x17D7,
0x17DC,
0x18AA,
0x1F59,
0x1F5B,
0x1F5D,
0x1FBE,
0x2071,
0x207F,
0x2102,
0x2107,
0x2115,
0x2124,
0x2126,
0x2128,
0x214E,
0x2D6F,
0x2E2F,
0x3400,
0x4DB5,
0x4E00,
0x9FC3,
0xAC00,
0xD7A3,
0xFB1D,
0xFB3E,
};
static const UnicodeTable identifier_start = {
263,
identifier_start_ranges,
71,
identifier_start_singletons
};
static const Range identifier_subsequent_ranges[] = {
{0x0030, 0x0039},
{0x0300, 0x036F},
{0x0483, 0x0487},
{0x0591, 0x05BD},
{0x05C1, 0x05C2},
{0x05C4, 0x05C5},
{0x0610, 0x061A},
{0x064B, 0x065E},
{0x0660, 0x0669},
{0x06D6, 0x06DC},
{0x06DF, 0x06E4},
{0x06E7, 0x06E8},
{0x06EA, 0x06ED},
{0x06F0, 0x06F9},
{0x0730, 0x074A},
{0x07A6, 0x07B0},
{0x07C0, 0x07C9},
{0x07EB, 0x07F3},
{0x0901, 0x0903},
{0x093E, 0x094D},
{0x0951, 0x0954},
{0x0962, 0x0963},
{0x0966, 0x096F},
{0x0981, 0x0983},
{0x09BE, 0x09C4},
{0x09C7, 0x09C8},
{0x09CB, 0x09CD},
{0x09E2, 0x09E3},
{0x09E6, 0x09EF},
{0x0A01, 0x0A03},
{0x0A3E, 0x0A42},
{0x0A47, 0x0A48},
{0x0A4B, 0x0A4D},
{0x0A66, 0x0A71},
{0x0A81, 0x0A83},
{0x0ABE, 0x0AC5},
{0x0AC7, 0x0AC9},
{0x0ACB, 0x0ACD},
{0x0AE2, 0x0AE3},
{0x0AE6, 0x0AEF},
{0x0B01, 0x0B03},
{0x0B3E, 0x0B44},
{0x0B47, 0x0B48},
{0x0B4B, 0x0B4D},
{0x0B56, 0x0B57},
{0x0B62, 0x0B63},
{0x0B66, 0x0B6F},
{0x0BBE, 0x0BC2},
{0x0BC6, 0x0BC8},
{0x0BCA, 0x0BCD},
{0x0BE6, 0x0BEF},
{0x0C01, 0x0C03},
{0x0C3E, 0x0C44},
{0x0C46, 0x0C48},
{0x0C4A, 0x0C4D},
{0x0C55, 0x0C56},
{0x0C62, 0x0C63},
{0x0C66, 0x0C6F},
{0x0C82, 0x0C83},
{0x0CBE, 0x0CC4},
{0x0CC6, 0x0CC8},
{0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6},
{0x0CE2, 0x0CE3},
{0x0CE6, 0x0CEF},
{0x0D02, 0x0D03},
{0x0D3E, 0x0D44},
{0x0D46, 0x0D48},
{0x0D4A, 0x0D4D},
{0x0D62, 0x0D63},
{0x0D66, 0x0D6F},
{0x0D82, 0x0D83},
{0x0DCF, 0x0DD4},
{0x0DD8, 0x0DDF},
{0x0DF2, 0x0DF3},
{0x0E34, 0x0E3A},
{0x0E47, 0x0E4E},
{0x0E50, 0x0E59},
{0x0EB4, 0x0EB9},
{0x0EBB, 0x0EBC},
{0x0EC8, 0x0ECD},
{0x0ED0, 0x0ED9},
{0x0F18, 0x0F19},
{0x0F20, 0x0F29},
{0x0F3E, 0x0F3F},
{0x0F71, 0x0F84},
{0x0F86, 0x0F87},
{0x0F90, 0x0F97},
{0x0F99, 0x0FBC},
{0x102B, 0x103E},
{0x1040, 0x1049},
{0x1056, 0x1059},
{0x105E, 0x1060},
{0x1062, 0x1064},
{0x1067, 0x106D},
{0x1071, 0x1074},
{0x1082, 0x108D},
{0x108F, 0x1099},
{0x1712, 0x1714},
{0x1732, 0x1734},
{0x1752, 0x1753},
{0x1772, 0x1773},
{0x17B6, 0x17D3},
{0x17E0, 0x17E9},
{0x180B, 0x180D},
{0x1810, 0x1819},
{0x1920, 0x192B},
{0x1930, 0x193B},
{0x1946, 0x194F},
{0x19B0, 0x19C0},
{0x19C8, 0x19C9},
{0x19D0, 0x19D9},
{0x1A17, 0x1A1B},
{0x1B00, 0x1B04},
{0x1B34, 0x1B44},
{0x1B50, 0x1B59},
{0x1B6B, 0x1B73},
{0x1B80, 0x1B82},
{0x1BA1, 0x1BAA},
{0x1BB0, 0x1BB9},
{0x1C24, 0x1C37},
{0x1C40, 0x1C49},
{0x1C50, 0x1C59},
{0x1DC0, 0x1DE6},
{0x1DFE, 0x1DFF},
{0x203F, 0x2040},
{0x20D0, 0x20DC},
{0x20E5, 0x20F0},
{0x2DE0, 0x2DFF},
{0x302A, 0x302F},
{0x3099, 0x309A},
{0xA620, 0xA629},
{0xA67C, 0xA67D},
{0xA823, 0xA827},
{0xA880, 0xA881},
{0xA8B4, 0xA8C4},
{0xA8D0, 0xA8D9},
{0xA900, 0xA909},
{0xA926, 0xA92D},
{0xA947, 0xA953},
{0xAA29, 0xAA36},
{0xAA4C, 0xAA4D},
{0xAA50, 0xAA59},
{0xFE00, 0xFE0F},
{0xFE20, 0xFE26},
{0xFE33, 0xFE34},
{0xFE4D, 0xFE4F},
{0xFF10, 0xFF19},
};
static const uint16_t identifier_subsequent_singletons[] = {
0x005F,
0x05BF,
0x05C7,
0x0670,
0x0711,
0x093C,
0x09BC,
0x09D7,
0x0A3C,
0x0A51,
0x0A75,
0x0ABC,
0x0B3C,
0x0B82,
0x0BD7,
0x0CBC,
0x0D57,
0x0DCA,
0x0DD6,
0x0E31,
0x0EB1,
0x0F35,
0x0F37,
0x0F39,
0x0FC6,
0x135F,
0x17DD,
0x18A9,
0x2054,
0x20E1,
0xA66F,
0xA802,
0xA806,
0xA80B,
0xAA43,
0xFB1E,
0xFF3F,
};
static const UnicodeTable identifier_subsequent = {
148,
identifier_subsequent_ranges,
37,
identifier_subsequent_singletons
};
static bool testNonASCIICharacter(const UnicodeTable* tbl, wchar c)
{
int32_t lo = 0;
int32_t hi = tbl->nranges-1;
while (lo <= hi) {
int32_t mid = (lo + hi) / 2;
if (tbl->ranges[mid].lo <= c && c <= tbl->ranges[mid].hi)
return true;
if (c < tbl->ranges[mid].lo)
hi = mid;
else
lo = mid;
}
lo = 0;
hi = tbl->nsingletons-1;
while (lo <= hi) {
int32_t mid = (lo + hi) / 2;
if (tbl->singletons[mid] == c)
return true;
if (c < tbl->singletons[mid])
hi = mid-1;
else
lo = mid+1;
}
return false;
}
bool isNonASCIIIdentifierStart(wchar c)
{
return testNonASCIICharacter(&identifier_start, c);
}
bool isNonASCIIIdentifierSubsequent(wchar c)
{
return testNonASCIICharacter(&identifier_start, c) ||
testNonASCIICharacter(&identifier_subsequent, c);
}
}
}
#endif // VMCFG_EVAL