root/eval/eval-lex.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. traceflag
  2. regexpImpl
  3. divideOperatorImpl
  4. rightAngleImpl
  5. rightShiftOrRelationalOperatorImpl
  6. lexImpl
  7. numberLiteral
  8. integerLiteral
  9. floatingLiteral
  10. checkNextCharForNumber
  11. numberLiteralPrime
  12. numberFraction
  13. numberExponent
  14. digits
  15. lineComment
  16. blockComment
  17. identifier
  18. stringLiteral
  19. escapeSequence
  20. octalOrNulEscape
  21. octalEscape
  22. hexEscape
  23. unicodeEscape
  24. digitValue
  25. parseInt
  26. parseFloat
  27. isUnicodeIdentifierStart
  28. isUnicodeIdentifierPart
  29. print

/* -*- tab-width: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is [Open Source Virtual Machine.].
 *
 * The Initial Developer of the Original Code is
 * Adobe System Incorporated.
 * Portions created by the Initial Developer are Copyright (C) 2008
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Adobe AS3 Team
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include "avmplus.h"

#ifdef VMCFG_EVAL

#include "eval.h"

namespace avmplus
{
        namespace RTC
        {
#define OCT CHAR_ATTR_OCTAL | CHAR_ATTR_DECIMAL | CHAR_ATTR_HEX
#define DEC CHAR_ATTR_DECIMAL | CHAR_ATTR_HEX
#define HEX CHAR_ATTR_HEX | CHAR_ATTR_LETTER
#define LTR CHAR_ATTR_LETTER
#define DLR CHAR_ATTR_DOLLAR
#define UBR CHAR_ATTR_UNDERBAR
                
                const uint8_t Lexer::char_attrs[128] = {
                /*   0 -  15 */   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
                /*  16 -  31 */   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
                /*  32 -  47 */   0,   0,   0,   0, DLR,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
                /*  48 -  63 */ OCT, OCT, OCT, OCT, OCT, OCT, OCT, OCT, DEC, DEC,   0,   0,   0,   0,   0,   0,
                /*  64 -  79 */   0, HEX, HEX, HEX, HEX, HEX, HEX, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR,
                /*  80 -  95 */ LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR,   0,   0,   0,   0, UBR,
                /*  96 - 111 */   0, HEX, HEX, HEX, HEX, HEX, HEX, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR,
                /* 112 - 127 */ LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR,   0,   0,   0,   0,   0
                };
                
#undef OCT
#undef DEC
#undef HEX
#undef LTR
#undef DLR
#undef UBR

                // Scanner
                //
                // src must be NUL-terminated!

                Lexer::Lexer(Compiler* compiler, const wchar* src, uint32_t srclen, bool keyword_or_ident)
                        : compiler(compiler)
                        , src(src)
                        , limit(src + srclen - 1)
                        , idx(src)
                        , mark(NULL)
                        , lineno(1)
                        , keyword_or_ident(keyword_or_ident)
        #ifdef DEBUG
                        , last_token(T_LAST)
                        , traceflag(false)
        #endif
                {
                        AvmAssert(*limit == 0);
                }
                
                /* Read slash-delimited string plus following compiler-> return as one string
                 * Initial '/' has been consumed
                 * Must handle unescaped / inside character sets
                 * Must strip \<newline>
                 * Must strip blanks from /x expressions
                 * Character sets are always flat
                 *
                 * The regex engine should evolve to handle \<newline> and blank
                 * stripping as that provides better debuggability.
                 */
                Token Lexer::regexpImpl() 
                {
                        // Performance is not critical here - regular expression
                        // literals are not hugely common - so use the AVM+ string
                        // type for a buffer in which to accumulate the literal.
                        //
                        // MSIE compatible, an unescaped '/' can appear in a character
                        // set.
                        
                        AvmAssert(last_token == T_BreakSlash);

                        StringBuilder s(compiler);
                        bool in_charset = false;
                        int c;

                        s.append('/');
                        for (;;) {
                                c = *idx++;
                                switch (c) {
                                        case 0:
                                                if (idx == limit)
                                                        compiler->syntaxError(lineno, "Unexpected end of program in regexp literal");
                                                break;
                                        case '/':
                                                if (!in_charset)
                                                        goto end_loop;
                                                break;
                                        case '[':
                                                in_charset = true;
                                                break;
                                        case ']':
                                                in_charset = false;
                                                break;
                                        case '\\':
                                                c = *idx++;
                                                switch (c) {
                                                        case 0:
                                                                if (idx == limit)
                                                                        compiler->syntaxError(lineno, "Unexpected end of program in regexp literal");
                                                                break;
                                                        case '\r':
                                                                if (*idx == '\n')
                                                                        idx++;
                                                                continue;
                                                        case '\n':
                                                        case UNICHAR_LS:
                                                        case UNICHAR_PS:
                                                                continue;
                                                        case 'u':
                                                                // PCRE does not understand \u sequences, so expand legal ones here.
                                                                mark = idx;
                                                                if (hexDigits(4)) {
                                                                        s.append((wchar)parseInt(16));
                                                                        continue;
                                                                }
                                                                idx = mark;
                                                                s.append('u');
                                                                continue;
                                                }
                                                s.append('\\');
                                                break;
                                        case '\n':
                                        case '\r':
                                        case UNICHAR_LS:
                                        case UNICHAR_PS:
                                                compiler->syntaxError(lineno, "Illegal newline in regexp literal");
                                }
                                s.append(c);
                        }
                end_loop:
                        s.append('/');
                        
                        // Flags
                        while (isUnicodeIdentifierPart(c = *idx)) {
                                idx++;
                                s.append(c);
                        }
                        
                        val.s = s.str();
                        DEBUG_ONLY(last_token = T_RegexpLiteral);
                        return T_RegexpLiteral;
                }
                
                Token Lexer::divideOperatorImpl() 
                {
                        AvmAssert(last_token == T_BreakSlash);
                        switch (*idx) {
                                case '=':
                                        idx++;
                                        return T_DivideAssign;
                                        
                                default :
                                        return T_Divide;
                        }
                }
                
                Token Lexer::rightAngleImpl() 
                {
                        AvmAssert(last_token == T_BreakRightAngle);
                        return T_GreaterThan;
                }
                
                Token Lexer::rightShiftOrRelationalOperatorImpl() 
                {
                        AvmAssert(last_token == T_BreakRightAngle);
                        switch (*idx) {
                                case '=':
                                        idx++;
                                        return T_GreaterThanOrEqual;
                                        
                                case '>':
                                        idx++;
                                        switch (*idx) {
                                                case '=':
                                                        idx++;
                                                        return T_RightShiftAssign;
                                                        
                                                case '>':
                                                        idx++;
                                                        if (*idx == '=') {
                                                                idx++;
                                                                return T_UnsignedRightShiftAssign;
                                                        }
                                                        return T_UnsignedRightShift;
                                                        
                                                default:
                                                        return T_RightShift;
                                        }
                                        
                                default:
                                        return T_GreaterThan;
                        }
                }
                
                /* Get the next token.
                 *
                 * How end-of-input is detected without checking explicitly: We
                 * check for end-of-input every time we see 0, which is allowed in
                 * the input (string literals, regexp literals, comments) but
                 * tends to be very rare in practice.
                 *
                 * Note that subscanners that consume input until something
                 * happens (like strings and comments) will need to check for 0 as
                 * well.
                 */
                Token Lexer::lexImpl()
                {
                        DEBUG_ONLY(last_token = T_LAST);
                        for (;;) {
                                switch (*idx++) {
                                        case 0:
                                                if (idx >= limit) {
                                                        idx = limit;
                                                        return compiler->parser.onEOS(&lineno, &val);
                                                }
                                                compiler->syntaxError(lineno, "Illegal character in input: NUL");
                                                
                                        case '\n' :
                                                lineno++;
                                                continue;
                                                
                                        case '\r' :
                                                if (*idx == '\n')
                                                        idx++;
                                                lineno++;
                                                continue;
                                                
                                        case ' ' :
                                        case '\t' :
                                        case '\v' :
                                        case '\f' :
                                                continue;
                                                
                                        case '(':
                                                return T_LeftParen;
                                                
                                        case ')':
                                                return T_RightParen;
                                                
                                        case ',':
                                                return T_Comma;
                                                
                                        case ';':
                                                return T_Semicolon;
                                                
                                        case '?':
                                                return T_Question;
                                                
                                        case '[':
                                                return T_LeftBracket;
                                                
                                        case ']':
                                                return T_RightBracket;
                                                
                                        case '{':
                                                return T_LeftBrace;
                                                
                                        case '}':
                                                return T_RightBrace;
                                                
                                        case '~':
                                                return T_BitwiseNot;
                                                
                                        case '/':
                                                switch (*idx) {
                                                        case '/':
                                                                idx++;
                                                                lineComment();
                                                                continue;
                                                                
                                                        case '*':
                                                                idx++;
                                                                blockComment();
                                                                continue;
                                                                
                                                        default:
                                                                mark = idx-1;
                                                                DEBUG_ONLY(last_token = T_BreakSlash);
                                                                return T_BreakSlash;
                                                }
                                                
                                        case '\'':
                                        case '"':
                                                return stringLiteral(idx[-1]);
                                                
                                        case '.':
                                                switch (*idx) {
                                                        case '.':
                                                                if (idx[1] == '.') {
                                                                        idx += 2;
                                                                        return T_TripleDot;
                                                                }
                                                                idx++;
                                                                return T_DoubleDot;
                                                                
                                                        case '<':
                                                                idx++;
                                                                return T_LeftDotAngle;
                                                                
                                                        case '0':
                                                        case '1':
                                                        case '2':
                                                        case '3':
                                                        case '4':
                                                        case '5':
                                                        case '6':
                                                        case '7':
                                                        case '8':
                                                        case '9':
                                                                mark = --idx;
                                                                return numberLiteral();
                                                                
                                                        default:
                                                                return T_Dot;
                                                }
                                                
                                        case '-':
                                                switch (*idx) {
                                                        case '-':
                                                                idx++;
                                                                return T_MinusMinus;
                                                                
                                                        case '=':
                                                                idx++;
                                                                return T_MinusAssign;
                                                                
                                                        default:
                                                                return T_Minus;
                                                }
                                                
                                        case '!':
                                                if (*idx == '=') {
                                                        idx++;
                                                        if (*idx == '=') {
                                                                idx++;
                                                                return T_StrictNotEqual;
                                                        }
                                                        return T_NotEqual;
                                                }
                                                return T_Not;
                                                
                                        case '%':
                                                if (*idx == '=') {
                                                        idx++;
                                                        return T_RemainderAssign;
                                                }
                                                return T_Remainder;
                                                
                                        case '&':
                                                switch (*idx) {
                                                        case '=':
                                                                idx++;
                                                                return T_BitwiseAndAssign;
                                                                
                                                        case '&':
                                                                idx++;
                                                                return T_LogicalAnd;
                                                                
                                                        default:
                                                                return T_BitwiseAnd;
                                                }
                                                
                                        case '*':
                                                if (*idx == '=') {
                                                        idx++;
                                                        return T_MultiplyAssign;
                                                }
                                                return T_Multiply;
                                                
                                        case ':':
                                                if (*idx == ':') {
                                                        idx++;
                                                        return T_DoubleColon;
                                                }
                                                return T_Colon;
                                                
                                        case '^':
                                                if (*idx == '=') {
                                                        idx++;
                                                        return T_BitwiseXorAssign;
                                                }
                                                return T_BitwiseXor;
                                                
                                        case '|':
                                                switch (*idx) {
                                                        case '=':
                                                                idx++;
                                                                return T_BitwiseOrAssign;
                                                                
                                                        case '|':
                                                                idx++;
                                                                return T_LogicalOr;
                                                                
                                                        default:
                                                                return T_BitwiseOr;
                                                }
                                                
                                        case '+':
                                                switch (*idx) {
                                                        case '+':
                                                                idx++;
                                                                return T_PlusPlus;
                                                                
                                                        case '=':
                                                                idx++;
                                                                return T_PlusAssign;
                                                                
                                                        default:
                                                                return T_Plus;
                                                }
                                                
                                        case '<':
                                                switch (*idx) {
                                                        case '=':
                                                                idx++;
                                                                return T_LessThanOrEqual;
                                                                
                                                        case '<':
                                                                idx++;
                                                                if (*idx == '=') {
                                                                        idx++;
                                                                        return T_LeftShiftAssign;
                                                                }
                                                                return T_LeftShift;
                                                                
                                                        case '?':
                                                                idx--;  // [sic]
                                                                return T_BreakXml;
                                                                
                                                        case '!':
                                                                if (idx[1] == '-' && idx[2] == '-') {
                                                                        idx--;  // [sic]
                                                                        return T_BreakXml;
                                                                }
                                                                return T_LessThan;
                                                                
                                                        default:
                                                                return T_LessThan;
                                                }
                                                
                                        case '=':
                                                if (*idx == '=') {
                                                        idx++;
                                                        if (*idx == '=') {
                                                                idx++;
                                                                return T_StrictEqual;
                                                        }
                                                        return T_Equal;
                                                }
                                                return T_Assign;
                                                
                                        case '>':
                                                DEBUG_ONLY(last_token = T_BreakRightAngle);
                                                return T_BreakRightAngle;
                                                
                                        case '@':
                                                return T_AtSign;

                                                // Begin generated code
                                        case 'a': 
                                                if (idx[0] == 's' &&
                                                        !compiler->es3_keywords &&
                                                        notPartOfIdent(idx[1])) {
                                                        idx += 1;
                                                        return T_As;
                                                }
                                                goto bigswitch_end;
                                        case 'b': 
                                                if (idx[0] == 'r' &&
                                                        idx[1] == 'e' &&
                                                        idx[2] == 'a' &&
                                                        idx[3] == 'k' &&
                                                        notPartOfIdent(idx[4])) {
                                                        idx += 4;
                                                        return T_Break;
                                                }
                                                goto bigswitch_end;
                                        case 'c': 
                                                switch(idx[0]) {
                                                        case 'a': 
                                                                switch(idx[1]) {
                                                                        case 's': 
                                                                                if (idx[2] == 'e' &&
                                                                                        notPartOfIdent(idx[3])) {
                                                                                        idx += 3;
                                                                                        return T_Case;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 't': 
                                                                                if (idx[2] == 'c' &&
                                                                                        idx[3] == 'h' &&
                                                                                        notPartOfIdent(idx[4])) {
                                                                                        idx += 4;
                                                                                        return T_Catch;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'l': 
                                                                if (idx[1] == 'a' &&
                                                                        idx[2] == 's' &&
                                                                        idx[3] == 's' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[4])) {
                                                                        idx += 4;
                                                                        return T_Class;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'o': 
                                                                switch(idx[1]) {
                                                                        case 'n': 
                                                                                switch(idx[2]) {
                                                                                        case 's': 
                                                                                                if (idx[3] == 't' &&
                                                                                                        !compiler->es3_keywords &&
                                                                                                        notPartOfIdent(idx[4])) {
                                                                                                        idx += 4;
                                                                                                        return T_Const;
                                                                                                }
                                                                                                goto bigswitch_end;
                                                                                        case 't': 
                                                                                                if (idx[3] == 'i' &&
                                                                                                        idx[4] == 'n' &&
                                                                                                        idx[5] == 'u' &&
                                                                                                        idx[6] == 'e' &&
                                                                                                        notPartOfIdent(idx[7])) {
                                                                                                        idx += 7;
                                                                                                        return T_Continue;
                                                                                                }
                                                                                                goto bigswitch_end;
                                                                                        default:
                                                                                                goto bigswitch_end;
                                                                                }
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'd': 
                                                switch(idx[0]) {
                                                        case 'e': 
                                                                switch(idx[1]) {
                                                                        case 'f': 
                                                                                if (idx[2] == 'a' &&
                                                                                        idx[3] == 'u' &&
                                                                                        idx[4] == 'l' &&
                                                                                        idx[5] == 't' &&
                                                                                        notPartOfIdent(idx[6])) {
                                                                                        idx += 6;
                                                                                        return T_Default;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 'l': 
                                                                                if (idx[2] == 'e' &&
                                                                                        idx[3] == 't' &&
                                                                                        idx[4] == 'e' &&
                                                                                        notPartOfIdent(idx[5])) {
                                                                                        idx += 5;
                                                                                        return T_Delete;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'o': 
                                                                if (!notPartOfIdent(idx[1])) 
                                                                        goto bigswitch_end;
                                                                idx += 1;
                                                                return T_Do;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'e': 
                                                switch(idx[0]) {
                                                        case 'l': 
                                                                if (idx[1] == 's' &&
                                                                        idx[2] == 'e' &&
                                                                        notPartOfIdent(idx[3])) {
                                                                        idx += 3;
                                                                        return T_Else;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'x': 
                                                                if (idx[1] == 't' &&
                                                                        idx[2] == 'e' &&
                                                                        idx[3] == 'n' &&
                                                                        idx[4] == 'd' &&
                                                                        idx[5] == 's' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[6])) {
                                                                        idx += 6;
                                                                        return T_Extends;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'f': 
                                                switch(idx[0]) {
                                                        case 'a': 
                                                                if (idx[1] == 'l' &&
                                                                        idx[2] == 's' &&
                                                                        idx[3] == 'e' &&
                                                                        notPartOfIdent(idx[4])) {
                                                                        idx += 4;
                                                                        return T_False;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'i': 
                                                                if (idx[1] == 'n' &&
                                                                        idx[2] == 'a' &&
                                                                        idx[3] == 'l' &&
                                                                        idx[4] == 'l' &&
                                                                        idx[5] == 'y' &&
                                                                        notPartOfIdent(idx[6])) {
                                                                        idx += 6;
                                                                        return T_Finally;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'o': 
                                                                if (idx[1] == 'r' &&
                                                                        notPartOfIdent(idx[2])) {
                                                                        idx += 2;
                                                                        return T_For;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'u': 
                                                                if (idx[1] == 'n' &&
                                                                        idx[2] == 'c' &&
                                                                        idx[3] == 't' &&
                                                                        idx[4] == 'i' &&
                                                                        idx[5] == 'o' &&
                                                                        idx[6] == 'n' &&
                                                                        notPartOfIdent(idx[7])) {
                                                                        idx += 7;
                                                                        return T_Function;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'i': 
                                                switch(idx[0]) {
                                                        case 'f': 
                                                                if (!notPartOfIdent(idx[1])) 
                                                                        goto bigswitch_end;
                                                                idx += 1;
                                                                return T_If;
                                                        case 'm': 
                                                                switch(idx[1]) {
                                                                        case 'p': 
                                                                                switch(idx[2]) {
                                                                                        case 'l': 
                                                                                                if (idx[3] == 'e' &&
                                                                                                        idx[4] == 'm' &&
                                                                                                        idx[5] == 'e' &&
                                                                                                        idx[6] == 'n' &&
                                                                                                        idx[7] == 't' &&
                                                                                                        idx[8] == 's' &&
                                                                                                        !compiler->es3_keywords &&
                                                                                                        notPartOfIdent(idx[9])) {
                                                                                                        idx += 9;
                                                                                                        return T_Implements;
                                                                                                }
                                                                                                goto bigswitch_end;
                                                                                        case 'o': 
                                                                                                if (idx[3] == 'r' &&
                                                                                                        idx[4] == 't' &&
                                                                                                        !compiler->es3_keywords &&
                                                                                                        notPartOfIdent(idx[5])) {
                                                                                                        idx += 5;
                                                                                                        return T_Import;
                                                                                                }
                                                                                                goto bigswitch_end;
                                                                                        default:
                                                                                                goto bigswitch_end;
                                                                                }
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'n': 
                                                                switch(idx[1]) {
                                                                        case 's': 
                                                                                if (idx[2] == 't' &&
                                                                                        idx[3] == 'a' &&
                                                                                        idx[4] == 'n' &&
                                                                                        idx[5] == 'c' &&
                                                                                        idx[6] == 'e' &&
                                                                                        idx[7] == 'o' &&
                                                                                        idx[8] == 'f' &&
                                                                                        notPartOfIdent(idx[9])) {
                                                                                        idx += 9;
                                                                                        return T_InstanceOf;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 't': 
                                                                                switch(idx[2]) {
                                                                                        case 'e': 
                                                                                                switch(idx[3]) {
                                                                                                        case 'r': 
                                                                                                                switch(idx[4]) {
                                                                                                                        case 'f': 
                                                                                                                                if (idx[5] == 'a' &&
                                                                                                                                        idx[6] == 'c' &&
                                                                                                                                        idx[7] == 'e' &&
                                                                                                                                        !compiler->es3_keywords &&
                                                                                                                                        notPartOfIdent(idx[8])) {
                                                                                                                                        idx += 8;
                                                                                                                                        return T_Interface;
                                                                                                                                }
                                                                                                                                goto bigswitch_end;
                                                                                                                        case 'n': 
                                                                                                                                if (idx[5] == 'a' &&
                                                                                                                                        idx[6] == 'l' &&
                                                                                                                                        !compiler->es3_keywords &&
                                                                                                                                        notPartOfIdent(idx[7])) {
                                                                                                                                        idx += 7;
                                                                                                                                        return T_Internal;
                                                                                                                                }
                                                                                                                                goto bigswitch_end;
                                                                                                                        default:
                                                                                                                                goto bigswitch_end;
                                                                                                                }
                                                                                                        default:
                                                                                                                goto bigswitch_end;
                                                                                                }
                                                                                        default:
                                                                                                goto bigswitch_end;
                                                                                }
                                                                        default:
                                                                                if (!notPartOfIdent(idx[1])) 
                                                                                        goto bigswitch_end;
                                                                                idx += 1;
                                                                                return T_In;
                                                                }
                                                        case 's': 
                                                                if (!compiler->es3_keywords && !notPartOfIdent(idx[1])) 
                                                                        goto bigswitch_end;
                                                                idx += 1;
                                                                return T_Is;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'n': 
                                                switch(idx[0]) {
                                                        case 'a': 
                                                                if (idx[1] == 't' &&
                                                                        idx[2] == 'i' &&
                                                                        idx[3] == 'v' &&
                                                                        idx[4] == 'e' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[5])) {
                                                                        idx += 5;
                                                                        return T_Native;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'e': 
                                                                if (idx[1] == 'w' &&
                                                                        notPartOfIdent(idx[2])) {
                                                                        idx += 2;
                                                                        return T_New;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'u': 
                                                                if (idx[1] == 'l' &&
                                                                        idx[2] == 'l' &&
                                                                        notPartOfIdent(idx[3])) {
                                                                        idx += 3;
                                                                        return T_Null;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'p': 
                                                switch(idx[0]) {
                                                        case 'a': 
                                                                if (idx[1] == 'c' &&
                                                                        idx[2] == 'k' &&
                                                                        idx[3] == 'a' &&
                                                                        idx[4] == 'g' &&
                                                                        idx[5] == 'e' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[6])) {
                                                                        idx += 6;
                                                                        return T_Package;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'r': 
                                                                switch(idx[1]) {
                                                                        case 'i': 
                                                                                if (idx[2] == 'v' &&
                                                                                        idx[3] == 'a' &&
                                                                                        idx[4] == 't' &&
                                                                                        idx[5] == 'e' &&
                                                                                        !compiler->es3_keywords &&
                                                                                        notPartOfIdent(idx[6])) {
                                                                                        idx += 6;
                                                                                        return T_Private;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 'o': 
                                                                                if (idx[2] == 't' &&
                                                                                        idx[3] == 'e' &&
                                                                                        idx[4] == 'c' &&
                                                                                        idx[5] == 't' &&
                                                                                        idx[6] == 'e' &&
                                                                                        idx[7] == 'd' &&
                                                                                        !compiler->es3_keywords &&
                                                                                        notPartOfIdent(idx[8])) {
                                                                                        idx += 8;
                                                                                        return T_Protected;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'u': 
                                                                if (idx[1] == 'b' &&
                                                                        idx[2] == 'l' &&
                                                                        idx[3] == 'i' &&
                                                                        idx[4] == 'c' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[5])) {
                                                                        idx += 5;
                                                                        return T_Public;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'r': 
                                                if (idx[0] == 'e' &&
                                                        idx[1] == 't' &&
                                                        idx[2] == 'u' &&
                                                        idx[3] == 'r' &&
                                                        idx[4] == 'n' &&
                                                        notPartOfIdent(idx[5])) {
                                                        idx += 5;
                                                        return T_Return;
                                                }
                                                goto bigswitch_end;
                                        case 's': 
                                                switch(idx[0]) {
                                                        case 'u': 
                                                                if (idx[1] == 'p' &&
                                                                        idx[2] == 'e' &&
                                                                        idx[3] == 'r' &&
                                                                        !compiler->es3_keywords &&
                                                                        notPartOfIdent(idx[4])) {
                                                                        idx += 4;
                                                                        return T_Super;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'w': 
                                                                if (idx[1] == 'i' &&
                                                                        idx[2] == 't' &&
                                                                        idx[3] == 'c' &&
                                                                        idx[4] == 'h' &&
                                                                        notPartOfIdent(idx[5])) {
                                                                        idx += 5;
                                                                        return T_Switch;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 't': 
                                                switch(idx[0]) {
                                                        case 'h': 
                                                                switch(idx[1]) {
                                                                        case 'i': 
                                                                                if (idx[2] == 's' &&
                                                                                        notPartOfIdent(idx[3])) {
                                                                                        idx += 3;
                                                                                        return T_This;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 'r': 
                                                                                if (idx[2] == 'o' &&
                                                                                        idx[3] == 'w' &&
                                                                                        notPartOfIdent(idx[4])) {
                                                                                        idx += 4;
                                                                                        return T_Throw;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'r': 
                                                                switch(idx[1]) {
                                                                        case 'u': 
                                                                                if (idx[2] == 'e' &&
                                                                                        notPartOfIdent(idx[3])) {
                                                                                        idx += 3;
                                                                                        return T_True;
                                                                                }
                                                                                goto bigswitch_end;
                                                                        case 'y': 
                                                                                if (!notPartOfIdent(idx[2])) 
                                                                                        goto bigswitch_end;
                                                                                idx += 2;
                                                                                return T_Try;
                                                                        default:
                                                                                goto bigswitch_end;
                                                                }
                                                        case 'y': 
                                                                if (idx[1] == 'p' &&
                                                                        idx[2] == 'e' &&
                                                                        idx[3] == 'o' &&
                                                                        idx[4] == 'f' &&
                                                                        notPartOfIdent(idx[5])) {
                                                                        idx += 5;
                                                                        return T_TypeOf;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'u': 
                                                if (idx[0] == 's' &&
                                                        idx[1] == 'e' &&
                                                        !compiler->es3_keywords &&
                                                        notPartOfIdent(idx[2])) {
                                                        idx += 2;
                                                        return T_Use;
                                                }
                                                goto bigswitch_end;
                                        case 'v': 
                                                switch(idx[0]) {
                                                        case 'a': 
                                                                if (idx[1] == 'r' &&
                                                                        notPartOfIdent(idx[2])) {
                                                                        idx += 2;
                                                                        return T_Var;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'o': 
                                                                if (idx[1] == 'i' &&
                                                                        idx[2] == 'd' &&
                                                                        notPartOfIdent(idx[3])) {
                                                                        idx += 3;
                                                                        return T_Void;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                        case 'w': 
                                                switch(idx[0]) {
                                                        case 'h': 
                                                                if (idx[1] == 'i' &&
                                                                        idx[2] == 'l' &&
                                                                        idx[3] == 'e' &&
                                                                        notPartOfIdent(idx[4])) {
                                                                        idx += 4;
                                                                        return T_While;
                                                                }
                                                                goto bigswitch_end;
                                                        case 'i': 
                                                                if (idx[1] == 't' &&
                                                                        idx[2] == 'h' &&
                                                                        notPartOfIdent(idx[3])) {
                                                                        idx += 3;
                                                                        return T_With;
                                                                }
                                                                goto bigswitch_end;
                                                        default:
                                                                goto bigswitch_end;
                                                }
                                                
                                                // End generated code
                                                
                                        case '\\':
                                                // In ES3, the only way backslash can appear in the input in
                                                // this position is as the first character of an identifier,
                                                // represented as a character escape.  So break out.
                                                goto bigswitch_end;
                                                
                                        case '0':
                                        case '1':
                                        case '2':
                                        case '3':
                                        case '4':
                                        case '5':
                                        case '6':
                                        case '7':
                                        case '8':
                                        case '9':
                                                mark = --idx;
                                                return numberLiteral();
                                                
                                        default: {
                                                
                                                // Check for Unicode space characters and line
                                                // terminators here in order to avoid inhibiting
                                                // switch optimization on less-capable compilers.
                                                
                                                int c = idx[-1];
                                                
                                                // No-break space.

                                                if (c == 0x00A0)
                                                        continue;
                                                
                                                // Quick check that saves us from the tedious
                                                // testing most of the time, also another hack to
                                                // get around poor switch code generation.
                                                
                                                if (c >= UNICHAR_LOWEST_ODDSPACE) {
                                                        switch (c) {
                                                                case UNICHAR_Zs1:
                                                                case UNICHAR_Zs2:
                                                                case UNICHAR_Zs3:
                                                                case UNICHAR_Zs4:
                                                                case UNICHAR_Zs5:
                                                                case UNICHAR_Zs6:
                                                                case UNICHAR_Zs7:
                                                                case UNICHAR_Zs8:
                                                                case UNICHAR_Zs9:
                                                                case UNICHAR_Zs10:
                                                                case UNICHAR_Zs11:
                                                                case UNICHAR_Zs12:
                                                                case UNICHAR_Zs13:
                                                                case UNICHAR_Zs14:
                                                                case UNICHAR_Zs15:
                                                                case UNICHAR_Zs16:
                                                                case UNICHAR_BOM1:
                                                                case UNICHAR_BOM2:
                                                                        continue;
                                                                case UNICHAR_LS:
                                                                case UNICHAR_PS:
                                                                        lineno++;
                                                                        continue;
                                                        }
                                                }
                                                
                                                goto bigswitch_end;
                                        }
                                }
                                
                        bigswitch_end:
                                // end of bigswitch.
                                
                                // Identifiers are handled here (and only here).
                                //
                                // It is never necessary to check whether an
                                // identifier not containing an escape squeuence is a
                                // keyword.  In cases where the input is "breakX"
                                // where X is a non-identifier character then break
                                // will in fact be returned first because of how the
                                // notPartOfIdentifier logic works, but since the X is
                                // not an identifier constituent there will be an
                                // immediate syntax error.  That's good enough for me.
                                
                                // If the scanner is used to re-check whether an identifier
                                // containing a backslash sequence looks like a keyword then
                                // we can stop here.

                                if (keyword_or_ident) {
                                        DEBUG_ONLY(last_token = T_Identifier);
                                        return T_Identifier;
                                }
                                
                                --idx;
                                return identifier();
                                
                        }
                }
                
                // All number literals start here without having consumed any
                // input.
                
                Token Lexer::numberLiteral()
                {
                        switch (*idx) {
                                case '0':
                                        // Octal / hex / a single 0 / 0.<something> /0e<something>
                                        switch (idx[1]) {
                                                case 'x':
                                                case 'X':
                                                        idx += 2;
                                                        mark = idx;
                                                        if (!hexDigits(-1))
                                                                compiler->syntaxError(lineno, "Illegal hexadecimal literal: no digits");
                                                        return integerLiteral(16);
                                                        
                                                case '.':
                                                        idx += 2;
                                                        numberFraction(true);
                                                        return floatingLiteral();
                                                        
                                                case 'E':
                                                case 'e':
                                                        idx += 2;
                                                        numberExponent();
                                                        return floatingLiteral();
                                                        
                                                default: {
                                                        if (!compiler->octal_literals)
                                                                break;

                                                        // Octal or single '0'
                                                        const wchar* startIndex = idx;
                                                        octalDigits(-1);
                                                        switch (*idx) {
                                                                case '8':
                                                                case '9':
                                                                        // Ignore the leading 0 and parse it as decimal.
                                                                        // Firefox extension; clear error in E262-3.
                                                                        idx = startIndex;
                                                                        break;
                                                                default:
                                                                        return integerLiteral(8);
                                                        }
                                                }
                                        }
                                        break;
                                        
                                case '.':
                                        idx++;
                                        numberFraction(false);
                                        return floatingLiteral();
                        }
                        
                        if (numberLiteralPrime())
                                return floatingLiteral();
                        else
                                return integerLiteral(10);
                }
                
                Token Lexer::integerLiteral(int base) 
                {
                        checkNextCharForNumber();
                        double n = parseInt(base);
                        if (n >= (-0x7fffffff - 1) && n <= 0x7FFFFFFF) {
                                val.i = (int32_t)n;
                                DEBUG_ONLY(last_token = T_IntLiteral);
                                return T_IntLiteral;
                        }
                        if (n >= 0x80000000U && n <= 0xFFFFFFFFU) {
                                val.u = (uint32_t)n;
                                DEBUG_ONLY(last_token = T_UIntLiteral);
                                return T_UIntLiteral;
                        }
                        val.d = n;
                        DEBUG_ONLY(last_token = T_DoubleLiteral);
                        return T_DoubleLiteral;
                }
                
                Token Lexer::floatingLiteral() 
                {
                        checkNextCharForNumber();
                        val.d = parseFloat();
                        DEBUG_ONLY(last_token = T_DoubleLiteral);
                        return T_DoubleLiteral;
                }
                
                void Lexer::checkNextCharForNumber() 
                {
                        int c = *idx;
                        if ((c >= '0' && c <= '9') || isUnicodeIdentifierStart(c))
                                compiler->syntaxError(lineno, "Illegal character following numeric literal: %c", c);
                }
                
                // Returns true iff the literal contains a decimal point or an
                // exponent marker, otherwise false.
                
                bool Lexer::numberLiteralPrime() 
                {
                        if (!decimalDigits(-1))
                                compiler->syntaxError(lineno, "Illegal number: no digits");
                        
                        switch (*idx) {
                                case '.':
                                        idx++;
                                        numberFraction (true);
                                        return true;
                                        
                                case 'e':
                                case 'E':
                                        idx++;
                                        numberExponent ();
                                        return true;
                                        
                                default:
                                        return false;
                        }
                }
                
                // The '.' has been consumed.
                //
                // has_leading_digits should be true if digits have been seen
                // before the '.'.
                
                void Lexer::numberFraction(bool has_leading_digits) 
                {
                        if (!decimalDigits (-1) && !has_leading_digits)
                                compiler->syntaxError(lineno, "Illegal number: must have digits before or after decimal point");
                        
                        switch (*idx) {
                                case 'e':
                                case 'E':
                                        idx++;
                                        numberExponent ();
                                        break;
                        }
                }
                
                // The 'e' has been consumed...
                
                void Lexer::numberExponent() 
                {
                        switch (*idx) {
                                case '+':
                                case '-':
                                        idx++;
                                        break;
                        }
                        if (!decimalDigits(-1))
                                compiler->syntaxError(lineno, "Illegal number: missing digits in exponent");
                }
                
                bool Lexer::digits(int k, int attrmask)
                {
                        const wchar* startIndex = idx;
                        int c;
                        while (k != 0 && (c = *idx) < 128 && (char_attrs[c] & attrmask) != 0) {
                                k--;
                                idx++;
                        }
                        return idx > startIndex && k <= 0;
                }
                
                void Lexer::lineComment() 
                {
                        for (;;) {
                                switch (*idx++) {
                                        case 0:
                                        case '\n':
                                        case '\r':
                                        case UNICHAR_LS:
                                        case UNICHAR_PS:
                                                idx--;
                                                return;
                                }
                        }
                }
                
                void Lexer::blockComment() 
                {
                        for (;;) {
                                int c;
                                while ((c = *idx++) != '*' &&
                                           c != 0  &&
                                           c != '\n' &&
                                           c != '\r'  &&
                                           c != UNICHAR_LS &&
                                           c != UNICHAR_PS)
                                        ;
                                if (c == '*') {
                                        if (*idx == '/') {
                                                idx++;
                                                return;
                                        }
                                        continue;
                                }
                                if (c == 0) {
                                        if (idx >= limit) {
                                                idx = limit;
                                                compiler->syntaxError(lineno, "End of input in block comment");
                                        }
                                        continue;
                                }
                                if (c == '\r') {
                                        if (*idx == '\n')
                                                idx++;
                                        c = '\n';
                                }
                                AvmAssert(c == '\n' || c == UNICHAR_LS || c == UNICHAR_PS);
                                lineno++;
                        }
                }
                
                Token Lexer::identifier() 
                {
                        // The common case here is that an identifier is a sequence of simple ASCII 
                        // characters, followed by a non-identifier-constituent ASCII character.  We 
                        // optimize for this.
                        
                        int c;
                        const wchar* start = idx;
                        
                        if ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_INITIAL) != 0) {
                                idx++;
                                while ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_SUBSEQUENT) != 0) 
                                        idx++;
                        }
                        
                        if (notPartOfIdent(c) && c != '\\') {
                                if (idx == start)
                                        compiler->syntaxError(lineno, "Invalid character in input: %c", *idx);
                                val.s = compiler->intern(start, uint32_t(idx-start));
                                DEBUG_ONLY(last_token = T_Identifier);
                                return T_Identifier;
                        }
                        
                        // Slow case.
                        //
                        // If at first we fail then try and try again...
                        
                        StringBuilder s(compiler);
                        bool has_backslashes = false;
                        for (;;) {
                                c = *idx;
                                if (c != '\\') {
                                        if (!(idx == start ? isUnicodeIdentifierStart(c) : isUnicodeIdentifierPart(c)))
                                                break;
                                        idx++;
                                }
                                else {
                                        has_backslashes = true;
                                        idx++;
                                        c = *idx;
                                        if (c != 'u')
                                                compiler->internalError(lineno, "Only unicode escapes allowed here");
                                        idx++;
                                        c = unicodeEscape();
                                        if (!(compiler->liberal_idents || (idx == start ? isUnicodeIdentifierStart(c) : isUnicodeIdentifierPart(c))))
                                                compiler->internalError(lineno, "Illegal identifier: unicode character is not allowed in identifier");
                                }
                                s.append(c);
                        }
                        
                        if (has_backslashes && !compiler->liberal_idents) {
                                // The ES3 spec requires that identifiers constructed
                                // with escape sequences must be checked after the
                                // fact to see if they are keywords.  Here we create a
                                // new lexer to do that.

                                StringBuilder s2(compiler);
                                s2.append(&s);
                                s2.append(0);
                                Str* text = s2.str();
                                uint32_t textlen = s2.length();
                                Lexer subscan(compiler, text->s, textlen, true);
                                uint32_t l;
                                TokenValue v;
                                if (subscan.lex(&l, &v) != T_Identifier)
                                        compiler->syntaxError(lineno, "Illegal identifier: escape sequence makes it look like a keyword");
                                AvmAssert(subscan.lex(&l, &v) == T_EOS);
                        }
                        
                        if (s.length() == 0)
                                compiler->syntaxError(lineno, "Invalid character in input: %c", *idx);

                        val.s = s.str();
                        DEBUG_ONLY(last_token = T_Identifier);
                        return T_Identifier;
                }
                
                Token Lexer::stringLiteral(int delimiter) 
                {
                        StringBuilder s(compiler);
                        int c;
                        
                        // The common case here is that the string contains unproblematic ASCII characters.
                        //
                        // OPTIMIZEME: for the common case we should not need to accumulate data in a StringBuilder,
                        // we should be able to go straight to a Str, as for the identifier case.  (Probably
                        // less critical here though.)
                        
                        for (;;) {
                                const wchar* start = idx;
                                
                                // OPTIMIZEME: too many conditions in this test now.  Should bias for ASCII
                                // and use table lookup to test for delimiters, NUL, and line endings.
                                
                                while ((c = *idx) != delimiter && 
                                           c != '\\' && 
                                           c != 0 &&
                                           c != '\n' &&
                                           c != '\r' &&
                                           c != UNICHAR_LS &&
                                           c != UNICHAR_PS &&
                                           c != UNICHAR_BOM1 &&
                                           c != UNICHAR_BOM2)
                                        idx++;
                                s.append(start, idx);
                                
                                switch (*idx) {
                                        case '\'':
                                        case '"':
                                                if (*idx == delimiter) {
                                                        idx++;
                                                        val.s = s.str();
                                                        DEBUG_ONLY(last_token = T_StringLiteral);
                                                        return T_StringLiteral;
                                                }
                                                break;  // syntax error

                                        case '\\':
                                                idx++;
                                                
                                                switch (*idx) {
                                                        case '\r':
                                                                idx++;
                                                                if (*idx == '\n')
                                                                        idx++;
                                                                lineno++;
                                                                continue;
                                                                
                                                        case UNICHAR_LS:
                                                        case UNICHAR_PS:
                                                        case '\n':
                                                                idx++;
                                                                lineno++;
                                                                continue;
                                                                
                                                        default:
                                                                s.append(escapeSequence());
                                                                continue;
                                                }
                                                
                                        case UNICHAR_BOM1:
                                        case UNICHAR_BOM2:
                                                s.append(' ');
                                                idx++;
                                                continue;
                                                
                                        case 0:
                                                if (idx < limit) {
                                                        s.append(0);
                                                        idx++;
                                                        continue;
                                                }
                                                break;  // syntax error
                                }
                                
                                compiler->syntaxError(lineno, "Unterminated string literal");
                        }
                }
                
                int Lexer::escapeSequence() 
                {
                        switch (*idx) {
                                case  '0':
                                case  '1':
                                case  '2':
                                case  '3':
                                case  '4':
                                case  '5':
                                case  '6':
                                case  '7':
                                        return octalOrNulEscape ();
                                        
                                case 'x':
                                        idx++;
                                        // Compatibility fallback, handle \x<whatever> as "x" followed by <whatever>
                                        mark = idx;
                                        if (hexDigits(2)) {
                                                idx = mark;
                                                return hexEscape(2);
                                        }
                                        idx = mark;
                                        return 'x';
                                        
                                case 'u':
                                        idx++;
                                        // Compatibility fallback, handle \u<whatever> as "u" followed by <whatever>
                                        mark = idx;
                                        if (hexDigits(4)) {
                                                idx = mark;
                                                return unicodeEscape ();
                                        }
                                        idx = mark;
                                        return 'u';

                                case 'b':
                                        idx++;
                                        return '\b';
                                        
                                case 'f':
                                        idx++;
                                        return '\f';
                                        
                                case 'n':
                                        idx++;
                                        return '\n';
                                        
                                case 'r':
                                        idx++;
                                        return '\r';
                                        
                                case 't':
                                        idx++;
                                        return '\t';
                                        
                                case 'v':
                                        idx++;
                                        return '\v';
                                        
                                case  '\'':
                                case  '"':
                                case  '\\':
                                        return *idx++;
                                        
                                case 0:
                                        if (idx+1 >= limit)
                                                compiler->syntaxError(lineno, "End of input in escape sequence");
                                        idx++;
                                        return 0;
                                        
                                case     '\n':
                                case     '\r':
                                case UNICHAR_LS:
                                case UNICHAR_PS:
                                        compiler->syntaxError(lineno, "Illegal line terminator in escape sequence");
                                        
                                default:
                                        return *idx++;
                        }
                }
                
                int Lexer::octalOrNulEscape() 
                {
                        int c;
                        if ((c = *idx) >= 128 || (char_attrs[c] & CHAR_ATTR_OCTAL) == 0)
                                compiler->syntaxError(lineno, "Expecting octal character, got %c", c);
                        
                        if (c == '0') {
                                idx++;
                                if ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_OCTAL) != 0)
                                        return octalEscape(3);
                                else
                                        return 0;
                        }
                        
                        if (c <= '3')
                                return octalEscape(3);
                        
                        return octalEscape(2);
                }
                
                int Lexer::octalEscape(int n) 
                {
                        mark = idx;
                        octalDigits(n);                 // Ignore result
                        return (int)parseInt(8);
                }
                
                // Any leading x or u has been consumed.  n is the number of
                // digits to consume and require.
                
                int Lexer::hexEscape(int n) 
                {
                        mark = idx;
                        if (!hexDigits(n))
                                compiler->syntaxError(lineno, "Wrong number of hexadecimal digits; expected %d", n);
                        return (int)parseInt(16);
                }
                
                // Any leading u has been consumed.
                
                int Lexer::unicodeEscape() 
                {
                        if (*idx == '{') {
                                idx++;
                                mark = idx;
                                if (!hexDigits(-1) || *idx != '}')
                                        compiler->syntaxError(lineno, "Invalid variable-length unicode escape");
                                int n = (int)parseInt(16);
                                idx++;
                                return n;
                        }
                        return hexEscape(4);
                }
                
                // Parses string of digits in the index in the range [mark,idx) in the given base.
                // Reliably returns Infinity on overflow.

                static uint32_t digitValue(wchar c)
                {
                        if (c <= '9')
                                return c - '0';
                        if (c <= 'F')
                                return c - ('A' - 10);
                        return c - ('a' - 10);
                }

                // For large values the algorithms for handling hex and octal have better
                // behavior than the straightforward loop that accumulates a result in a double.

                double Lexer::parseInt(int base)
                {
                        uint64_t bits = 0;
                        uint32_t scale = 0;
                        uint32_t k = 0;

                        while (mark < idx && *mark == '0')
                                mark++;

                        if (mark==idx)
                                return 0.0;

                        switch (base) {
                                case 8:
                                        for ( const wchar* i=mark ; i < idx ; i++ ) {
                                                if (k < 22) {
                                                        bits = bits << 3 | digitValue(*i);
                                                        k++;
                                                }
                                                scale += 3;
                                        }
                                        goto bits_and_scale;
                                        
                                case 16:
                                        for ( const wchar* i=mark ; i < idx ; i++ ) {
                                                if (k < 16) {
                                                        bits = bits << 4 | digitValue(*i);
                                                        k++;
                                                }
                                                scale += 4;
                                        }
                                        goto bits_and_scale;
                                        
                                case 10:
                                        return parseFloat();

                                default:
                                        compiler->internalError(lineno, "Unknown base in parseInt");
                        }

                bits_and_scale:
                        // Left-adjust
                        uint32_t n = scale;
                        if (n < 33) { bits <<= 32; n += 32; }
                        if (n < 49) { bits <<= 16; n += 16; }
                        if (n < 57) { bits <<= 8; n += 8; }
                        if (n < 61) { bits <<= 4; n += 4; }
                        if (n < 63) { bits <<= 2; n += 2; }
                        if (n < 64) { bits <<= 1; n += 1; }

                        // Normalize
                        if ((int64_t)bits > 0) { bits <<= 1; scale--; }
                        if ((int64_t)bits > 0) { bits <<= 1; scale--; }
                        if ((int64_t)bits > 0) { bits <<= 1; scale--; }

                        // Get rid of implicit leading bit, shift into position
                        bits <<= 1;
                        uint64_t lost = bits & 0xFFF;
                        bits >>= 12;
                        scale--;
                        
                        // Round to even
                        // FIME: it would seem necessary to re-normalize following rounding.
                        if (lost > 0x800)
                                bits += 1;
                        else if (lost == 0x800) {
                                if (bits & 1)
                                        bits += 1;
                        }
                        bits &= ~(uint64_t)0 >> 12;

                        // Compute and insert exponent
                        // FIXME: overflow integer constants properly to Infinity.
                        bits |= (uint64_t)(1023 + scale) << 52;
                        union {
                                uint64_t bits;
                                double d;
                        } u;
                        u.bits = bits;
                        return u.d;
                }
                
                double Lexer::parseFloat()
                {
                        // NOTE: Though convertStringToDouble takes a 'len' parameter, it does *not* check this
                        // parameter to determine whether it should stop parsing!  We are saved here only because
                        // we've stopped scanning at the point where that function will stop parsing, and because
                        // our input is NUL-terminated.
                        //
                        // NOTE: The 'strict' flag must be false or we'll get an error because the function will
                        // be confused by trailing non-NUL characters.
                        //
                        // NOTE: String API misfeature.  Silly to have to create a new string here.

                        double n;
                        StringBuilder s(compiler);
                        s.append(mark, idx);
                        DEBUG_ONLY(bool flag =) compiler->context->stringToDouble(s.chardata(), &n);
                        AvmAssert(flag);
                        return n;
                }
                
                bool Lexer::isUnicodeIdentifierStart(int c)
                {
                        if (c < 128)
                                return (char_attrs[c] & CHAR_ATTR_INITIAL) != 0;
                        else
                                return isNonASCIIIdentifierStart((wchar)c);
                }
                
                bool Lexer::isUnicodeIdentifierPart(int c)
                {
                        if (c < 128)
                                return (char_attrs[c] & CHAR_ATTR_SUBSEQUENT) != 0;
                        else
                                return isNonASCIIIdentifierSubsequent((wchar)c);
                }
                
#ifdef DEBUG
        
                void Lexer::print(Token t, uint32_t/* l*/, TokenValue v)
                {
                        char buf[200];
                        *buf = 0;
                        switch (t) {
                                case T_Identifier:
                                        strcpy(buf, "I ");
                                        getn(buf+2, v.s, sizeof(buf)-2);
                                        break;
                                case T_StringLiteral:
                                        strcpy(buf, "S ");
                                        getn(buf+2, v.s, sizeof(buf)-2);
                                        break;
                                case T_RegexpLiteral:
                                        strcpy(buf, "R ");
                                        getn(buf+2, v.s, sizeof(buf)-2);
                                        break;
                                case T_IntLiteral:
                                        sprintf(buf, "i %d", v.i);
                                        break;
                                case T_UIntLiteral:
                                        sprintf(buf, "u %u", v.u);
                                        break;
                                case T_DoubleLiteral:
                                        sprintf(buf, "d %g", v.d);
                                        break;
                                default:
                                        break;
                        }
                        printf("%d %s\n", (int)t, buf);
                }
        
#endif // DEBUG
        
        }
}

#endif // VMCFG_EVAL

/* [<][>][^][v][top][bottom][index][help] */