Source/core/css/CSSTokenizer-in.cpp

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
isCSSLetter
isCSSEscape
isURILetter
isIdentifierStartAfterDash
isEqualToCSSIdentifier
isEqualToCSSCaseSensitiveIdentifier
checkAndSkipEscape
skipWhiteSpace
allocateStringBuffer16
tokenLocation
currentLocation
isIdentifierStart
checkAndSkipString
parseEscape
peekMaxIdentifierLen
parseIdentifierInternal
parseIdentifier
peekMaxStringLen
parseStringInternal
parseString
findURI
peekMaxURILen
parseURIInternal
parseURI
parseUnicodeRange
parseNthChild
parseNthChildExtra
detectFunctionTypeToken
detectMediaQueryToken
detectNumberToken
detectDashToken
detectAtToken
detectSupportsToken
realLex
setupTokenizer
/*
 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)
 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>
 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
 * Copyright (C) 2012 Intel Corporation. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include "core/css/CSSTokenizer.h"

#include "core/css/CSSKeyframeRule.h"
#include "core/css/parser/BisonCSSParser.h"
#include "core/css/CSSParserValues.h"
#include "core/css/MediaQuery.h"
#include "core/css/StyleRule.h"
#include "core/html/parser/HTMLParserIdioms.h"
#include "core/svg/SVGParserUtilities.h"

namespace WebCore {

#include "CSSGrammar.h"

enum CharacterType {
    // Types for the main switch.

    // The first 4 types must be grouped together, as they
    // represent the allowed chars in an identifier.
    CharacterCaselessU,
    CharacterIdentifierStart,
    CharacterNumber,
    CharacterDash,

    CharacterOther,
    CharacterNull,
    CharacterWhiteSpace,
    CharacterEndMediaQueryOrSupports,
    CharacterEndNthChild,
    CharacterQuote,
    CharacterExclamationMark,
    CharacterHashmark,
    CharacterDollar,
    CharacterAsterisk,
    CharacterPlus,
    CharacterDot,
    CharacterSlash,
    CharacterLess,
    CharacterAt,
    CharacterBackSlash,
    CharacterXor,
    CharacterVerticalBar,
    CharacterTilde,
};

// 128 ASCII codes
static const CharacterType typesOfASCIICharacters[128] = {
/*   0 - Null               */ CharacterNull,
/*   1 - Start of Heading   */ CharacterOther,
/*   2 - Start of Text      */ CharacterOther,
/*   3 - End of Text        */ CharacterOther,
/*   4 - End of Transm.     */ CharacterOther,
/*   5 - Enquiry            */ CharacterOther,
/*   6 - Acknowledgment     */ CharacterOther,
/*   7 - Bell               */ CharacterOther,
/*   8 - Back Space         */ CharacterOther,
/*   9 - Horizontal Tab     */ CharacterWhiteSpace,
/*  10 - Line Feed          */ CharacterWhiteSpace,
/*  11 - Vertical Tab       */ CharacterOther,
/*  12 - Form Feed          */ CharacterWhiteSpace,
/*  13 - Carriage Return    */ CharacterWhiteSpace,
/*  14 - Shift Out          */ CharacterOther,
/*  15 - Shift In           */ CharacterOther,
/*  16 - Data Line Escape   */ CharacterOther,
/*  17 - Device Control 1   */ CharacterOther,
/*  18 - Device Control 2   */ CharacterOther,
/*  19 - Device Control 3   */ CharacterOther,
/*  20 - Device Control 4   */ CharacterOther,
/*  21 - Negative Ack.      */ CharacterOther,
/*  22 - Synchronous Idle   */ CharacterOther,
/*  23 - End of Transmit    */ CharacterOther,
/*  24 - Cancel             */ CharacterOther,
/*  25 - End of Medium      */ CharacterOther,
/*  26 - Substitute         */ CharacterOther,
/*  27 - Escape             */ CharacterOther,
/*  28 - File Separator     */ CharacterOther,
/*  29 - Group Separator    */ CharacterOther,
/*  30 - Record Separator   */ CharacterOther,
/*  31 - Unit Separator     */ CharacterOther,
/*  32 - Space              */ CharacterWhiteSpace,
/*  33 - !                  */ CharacterExclamationMark,
/*  34 - "                  */ CharacterQuote,
/*  35 - #                  */ CharacterHashmark,
/*  36 - $                  */ CharacterDollar,
/*  37 - %                  */ CharacterOther,
/*  38 - &                  */ CharacterOther,
/*  39 - '                  */ CharacterQuote,
/*  40 - (                  */ CharacterOther,
/*  41 - )                  */ CharacterEndNthChild,
/*  42 - *                  */ CharacterAsterisk,
/*  43 - +                  */ CharacterPlus,
/*  44 - ,                  */ CharacterOther,
/*  45 - -                  */ CharacterDash,
/*  46 - .                  */ CharacterDot,
/*  47 - /                  */ CharacterSlash,
/*  48 - 0                  */ CharacterNumber,
/*  49 - 1                  */ CharacterNumber,
/*  50 - 2                  */ CharacterNumber,
/*  51 - 3                  */ CharacterNumber,
/*  52 - 4                  */ CharacterNumber,
/*  53 - 5                  */ CharacterNumber,
/*  54 - 6                  */ CharacterNumber,
/*  55 - 7                  */ CharacterNumber,
/*  56 - 8                  */ CharacterNumber,
/*  57 - 9                  */ CharacterNumber,
/*  58 - :                  */ CharacterOther,
/*  59 - ;                  */ CharacterEndMediaQueryOrSupports,
/*  60 - <                  */ CharacterLess,
/*  61 - =                  */ CharacterOther,
/*  62 - >                  */ CharacterOther,
/*  63 - ?                  */ CharacterOther,
/*  64 - @                  */ CharacterAt,
/*  65 - A                  */ CharacterIdentifierStart,
/*  66 - B                  */ CharacterIdentifierStart,
/*  67 - C                  */ CharacterIdentifierStart,
/*  68 - D                  */ CharacterIdentifierStart,
/*  69 - E                  */ CharacterIdentifierStart,
/*  70 - F                  */ CharacterIdentifierStart,
/*  71 - G                  */ CharacterIdentifierStart,
/*  72 - H                  */ CharacterIdentifierStart,
/*  73 - I                  */ CharacterIdentifierStart,
/*  74 - J                  */ CharacterIdentifierStart,
/*  75 - K                  */ CharacterIdentifierStart,
/*  76 - L                  */ CharacterIdentifierStart,
/*  77 - M                  */ CharacterIdentifierStart,
/*  78 - N                  */ CharacterIdentifierStart,
/*  79 - O                  */ CharacterIdentifierStart,
/*  80 - P                  */ CharacterIdentifierStart,
/*  81 - Q                  */ CharacterIdentifierStart,
/*  82 - R                  */ CharacterIdentifierStart,
/*  83 - S                  */ CharacterIdentifierStart,
/*  84 - T                  */ CharacterIdentifierStart,
/*  85 - U                  */ CharacterCaselessU,
/*  86 - V                  */ CharacterIdentifierStart,
/*  87 - W                  */ CharacterIdentifierStart,
/*  88 - X                  */ CharacterIdentifierStart,
/*  89 - Y                  */ CharacterIdentifierStart,
/*  90 - Z                  */ CharacterIdentifierStart,
/*  91 - [                  */ CharacterOther,
/*  92 - \                  */ CharacterBackSlash,
/*  93 - ]                  */ CharacterOther,
/*  94 - ^                  */ CharacterXor,
/*  95 - _                  */ CharacterIdentifierStart,
/*  96 - `                  */ CharacterOther,
/*  97 - a                  */ CharacterIdentifierStart,
/*  98 - b                  */ CharacterIdentifierStart,
/*  99 - c                  */ CharacterIdentifierStart,
/* 100 - d                  */ CharacterIdentifierStart,
/* 101 - e                  */ CharacterIdentifierStart,
/* 102 - f                  */ CharacterIdentifierStart,
/* 103 - g                  */ CharacterIdentifierStart,
/* 104 - h                  */ CharacterIdentifierStart,
/* 105 - i                  */ CharacterIdentifierStart,
/* 106 - j                  */ CharacterIdentifierStart,
/* 107 - k                  */ CharacterIdentifierStart,
/* 108 - l                  */ CharacterIdentifierStart,
/* 109 - m                  */ CharacterIdentifierStart,
/* 110 - n                  */ CharacterIdentifierStart,
/* 111 - o                  */ CharacterIdentifierStart,
/* 112 - p                  */ CharacterIdentifierStart,
/* 113 - q                  */ CharacterIdentifierStart,
/* 114 - r                  */ CharacterIdentifierStart,
/* 115 - s                  */ CharacterIdentifierStart,
/* 116 - t                  */ CharacterIdentifierStart,
/* 117 - u                  */ CharacterCaselessU,
/* 118 - v                  */ CharacterIdentifierStart,
/* 119 - w                  */ CharacterIdentifierStart,
/* 120 - x                  */ CharacterIdentifierStart,
/* 121 - y                  */ CharacterIdentifierStart,
/* 122 - z                  */ CharacterIdentifierStart,
/* 123 - {                  */ CharacterEndMediaQueryOrSupports,
/* 124 - |                  */ CharacterVerticalBar,
/* 125 - }                  */ CharacterOther,
/* 126 - ~                  */ CharacterTilde,
/* 127 - Delete             */ CharacterOther,
};

// Utility functions for the CSS tokenizer.

template <typename CharacterType>
static inline bool isCSSLetter(CharacterType character)
{
    return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
}

template <typename CharacterType>
static inline bool isCSSEscape(CharacterType character)
{
    return character >= ' ' && character != 127;
}

template <typename CharacterType>
static inline bool isURILetter(CharacterType character)
{
    return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
}

template <typename CharacterType>
static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
{
    return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
        || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
}

template <typename CharacterType>
static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
{
    // Compare an character memory data with a zero terminated string.
    do {
        // The input must be part of an identifier if constantChar or constString
        // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
        ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
        ASSERT(*constantString != '-' || isCSSLetter(*cssString));
        if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
            return false;
    } while (*constantString);
    return true;
}

template <typename CharacterType>
static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
{
    ASSERT(*constantString);

    do {
        if (*string++ != *constantString++)
            return false;
    } while (*constantString);
    return true;
}

template <typename CharacterType>
static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
{
    // Returns with 0, if escape check is failed. Otherwise
    // it returns with the following character.
    ASSERT(*currentCharacter == '\\');

    ++currentCharacter;
    if (!isCSSEscape(*currentCharacter))
        return 0;

    if (isASCIIHexDigit(*currentCharacter)) {
        int length = 6;

        do {
            ++currentCharacter;
        } while (isASCIIHexDigit(*currentCharacter) && --length);

        // Optional space after the escape sequence.
        if (isHTMLSpace<CharacterType>(*currentCharacter))
            ++currentCharacter;
        return currentCharacter;
    }
    return currentCharacter + 1;
}

template <typename CharacterType>
static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
{
    while (isHTMLSpace<CharacterType>(*currentCharacter))
        ++currentCharacter;
    return currentCharacter;
}

// Main CSS tokenizer functions.

template <>
inline LChar*& CSSTokenizer::currentCharacter<LChar>()
{
    return m_currentCharacter8;
}

template <>
inline UChar*& CSSTokenizer::currentCharacter<UChar>()
{
    return m_currentCharacter16;
}

UChar* CSSTokenizer::allocateStringBuffer16(size_t len)
{
    // Allocates and returns a CSSTokenizer owned buffer for storing
    // UTF-16 data. Used to get a suitable life span for UTF-16
    // strings, identifiers and URIs created by the tokenizer.
    OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);

    UChar* bufferPtr = buffer.get();

    m_cssStrings16.append(buffer.release());
    return bufferPtr;
}

template <>
inline LChar* CSSTokenizer::dataStart<LChar>()
{
    return m_dataStart8.get();
}

template <>
inline UChar* CSSTokenizer::dataStart<UChar>()
{
    return m_dataStart16.get();
}

template <typename CharacterType>
inline CSSParserLocation CSSTokenizer::tokenLocation()
{
    CSSParserLocation location;
    location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
    location.lineNumber = m_tokenStartLineNumber;
    location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
    return location;
}

CSSParserLocation CSSTokenizer::currentLocation()
{
    if (is8BitSource())
        return tokenLocation<LChar>();
    return tokenLocation<UChar>();
}

template <typename CharacterType>
inline bool CSSTokenizer::isIdentifierStart()
{
    // Check whether an identifier is started.
    return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
}

template <typename CharacterType>
static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote)
{
    // Returns with 0, if string check is failed. Otherwise
    // it returns with the following character. This is necessary
    // since we cannot revert escape sequences, thus strings
    // must be validated before parsing.
    while (true) {
        if (UNLIKELY(*currentCharacter == quote)) {
            // String parsing is successful.
            return currentCharacter + 1;
        }
        if (UNLIKELY(!*currentCharacter)) {
            // String parsing is successful up to end of input.
            return currentCharacter;
        }
        if (UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
            // String parsing is failed for character '\n', '\f' or '\r'.
            return 0;
        }

        if (LIKELY(currentCharacter[0] != '\\')) {
            ++currentCharacter;
        } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
            currentCharacter += 2;
        } else if (currentCharacter[1] == '\r') {
            currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
        } else {
            currentCharacter = checkAndSkipEscape(currentCharacter);
            if (!currentCharacter)
                return 0;
        }
    }
}

template <typename CharacterType>
unsigned CSSTokenizer::parseEscape(CharacterType*& src)
{
    ASSERT(*src == '\\' && isCSSEscape(src[1]));

    unsigned unicode = 0;

    ++src;
    if (isASCIIHexDigit(*src)) {

        int length = 6;

        do {
            unicode = (unicode << 4) + toASCIIHexValue(*src++);
        } while (--length && isASCIIHexDigit(*src));

        // Characters above 0x10ffff are not handled.
        if (unicode > 0x10ffff)
            unicode = 0xfffd;

        // Optional space after the escape sequence.
        if (isHTMLSpace<CharacterType>(*src))
            ++src;

        return unicode;
    }

    return *src++;
}

template <>
inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
{
    ASSERT(unicode <= 0xff);
    *result = unicode;

    ++result;
}

template <>
inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
{
    // Replace unicode with a surrogate pairs when it is bigger than 0xffff
    if (U16_LENGTH(unicode) == 2) {
        *result++ = U16_LEAD(unicode);
        *result = U16_TRAIL(unicode);
    } else {
        *result = unicode;
    }

    ++result;
}

template <typename SrcCharacterType>
size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
{
    // The decoded form of an identifier (after resolving escape
    // sequences) will not contain more characters (ASCII or UTF-16
    // codepoints) than the input. This code can therefore ignore
    // escape sequences completely.
    SrcCharacterType* start = src;
    do {
        if (LIKELY(*src != '\\'))
            src++;
        else
            parseEscape<SrcCharacterType>(src);
    } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));

    return src - start;
}

template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
{
    hasEscape = false;
    do {
        if (LIKELY(*src != '\\')) {
            *result++ = *src++;
        } else {
            hasEscape = true;
            SrcCharacterType* savedEscapeStart = src;
            unsigned unicode = parseEscape<SrcCharacterType>(src);
            if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
                src = savedEscapeStart;
                return false;
            }
            UnicodeToChars(result, unicode);
        }
    } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));

    return true;
}

template <typename CharacterType>
inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
{
    // If a valid identifier start is found, we can safely
    // parse the identifier until the next invalid character.
    ASSERT(isIdentifierStart<CharacterType>());

    CharacterType* start = currentCharacter<CharacterType>();
    if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
        // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
        ASSERT(is8BitSource());
        UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdentifierLen(currentCharacter<CharacterType>()));
        UChar* start16 = result16;
        int i = 0;
        for (; i < result - start; i++)
            result16[i] = start[i];

        result16 += i;

        parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);

        resultString.init(start16, result16 - start16);

        return;
    }

    resultString.init(start, result - start);
}

template <typename SrcCharacterType>
size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
{
    // The decoded form of a CSS string (after resolving escape
    // sequences) will not contain more characters (ASCII or UTF-16
    // codepoints) than the input. This code can therefore ignore
    // escape sequences completely and just return the length of the
    // input string (possibly including terminating quote if any).
    SrcCharacterType* end = checkAndSkipString(src, quote);
    return end ? end - src : 0;
}

template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
{
    while (true) {
        if (UNLIKELY(*src == quote)) {
            // String parsing is done.
            ++src;
            return true;
        }
        if (UNLIKELY(!*src)) {
            // String parsing is done, but don't advance pointer if at the end of input.
            return true;
        }
        ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');

        if (LIKELY(src[0] != '\\')) {
            *result++ = *src++;
        } else if (src[1] == '\n' || src[1] == '\f') {
            src += 2;
        } else if (src[1] == '\r') {
            src += src[2] == '\n' ? 3 : 2;
        } else {
            SrcCharacterType* savedEscapeStart = src;
            unsigned unicode = parseEscape<SrcCharacterType>(src);
            if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
                src = savedEscapeStart;
                return false;
            }
            UnicodeToChars(result, unicode);
        }
    }

    return true;
}

template <typename CharacterType>
inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
{
    CharacterType* start = currentCharacter<CharacterType>();

    if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
        // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
        ASSERT(is8BitSource());
        UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStringLen(currentCharacter<CharacterType>(), quote));
        UChar* start16 = result16;
        int i = 0;
        for (; i < result - start; i++)
            result16[i] = start[i];

        result16 += i;

        parseStringInternal(currentCharacter<CharacterType>(), result16, quote);

        resultString.init(start16, result16 - start16);
        return;
    }

    resultString.init(start, result - start);
}

template <typename CharacterType>
inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
{
    start = skipWhiteSpace(currentCharacter<CharacterType>());

    if (*start == '"' || *start == '\'') {
        quote = *start++;
        end = checkAndSkipString(start, quote);
        if (!end)
            return false;
    } else {
        quote = 0;
        end = start;
        while (isURILetter(*end)) {
            if (LIKELY(*end != '\\')) {
                ++end;
            } else {
                end = checkAndSkipEscape(end);
                if (!end)
                    return false;
            }
        }
    }

    end = skipWhiteSpace(end);
    if (*end != ')')
        return false;

    return true;
}

template <typename SrcCharacterType>
inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
{
    // The decoded form of a URI (after resolving escape sequences)
    // will not contain more characters (ASCII or UTF-16 codepoints)
    // than the input. This code can therefore ignore escape sequences
    // completely.
    SrcCharacterType* start = src;
    if (quote) {
        ASSERT(quote == '"' || quote == '\'');
        return peekMaxStringLen(src, quote);
    }

    while (isURILetter(*src)) {
        if (LIKELY(*src != '\\'))
            src++;
        else
            parseEscape<SrcCharacterType>(src);
    }

    return src - start;
}

template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
{
    if (quote) {
        ASSERT(quote == '"' || quote == '\'');
        return parseStringInternal(src, dest, quote);
    }

    while (isURILetter(*src)) {
        if (LIKELY(*src != '\\')) {
            *dest++ = *src++;
        } else {
            unsigned unicode = parseEscape<SrcCharacterType>(src);
            if (unicode > 0xff && sizeof(DestCharacterType) == 1)
                return false;
            UnicodeToChars(dest, unicode);
        }
    }

    return true;
}

template <typename CharacterType>
inline void CSSTokenizer::parseURI(CSSParserString& string)
{
    CharacterType* uriStart;
    CharacterType* uriEnd;
    UChar quote;
    if (!findURI(uriStart, uriEnd, quote))
        return;

    CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
    if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
        string.init(uriStart, dest - uriStart);
    } else {
        // An escape sequence was encountered that can't be stored in 8 bits.
        // Reset the current character to the start of the URI and re-parse with
        // a 16-bit destination.
        ASSERT(is8BitSource());
        currentCharacter<CharacterType>() = uriStart;
        UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<CharacterType>(), quote));
        UChar* uriStart16 = result16;
        bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);
        ASSERT_UNUSED(result, result);
        string.init(uriStart16, result16 - uriStart16);
    }

    currentCharacter<CharacterType>() = uriEnd + 1;
    m_token = URI;
}

template <typename CharacterType>
inline bool CSSTokenizer::parseUnicodeRange()
{
    CharacterType* character = currentCharacter<CharacterType>() + 1;
    int length = 6;
    ASSERT(*currentCharacter<CharacterType>() == '+');

    while (isASCIIHexDigit(*character) && length) {
        ++character;
        --length;
    }

    if (length && *character == '?') {
        // At most 5 hex digit followed by a question mark.
        do {
            ++character;
            --length;
        } while (*character == '?' && length);
        currentCharacter<CharacterType>() = character;
        return true;
    }

    if (length < 6) {
        // At least one hex digit.
        if (character[0] == '-' && isASCIIHexDigit(character[1])) {
            // Followed by a dash and a hex digit.
            ++character;
            length = 6;
            do {
                ++character;
            } while (--length && isASCIIHexDigit(*character));
        }
        currentCharacter<CharacterType>() = character;
        return true;
    }
    return false;
}

template <typename CharacterType>
bool CSSTokenizer::parseNthChild()
{
    CharacterType* character = currentCharacter<CharacterType>();

    while (isASCIIDigit(*character))
        ++character;
    if (isASCIIAlphaCaselessEqual(*character, 'n')) {
        currentCharacter<CharacterType>() = character + 1;
        return true;
    }
    return false;
}

template <typename CharacterType>
bool CSSTokenizer::parseNthChildExtra()
{
    CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
    if (*character != '+' && *character != '-')
        return false;

    character = skipWhiteSpace(character + 1);
    if (!isASCIIDigit(*character))
        return false;

    do {
        ++character;
    } while (isASCIIDigit(*character));

    currentCharacter<CharacterType>() = character;
    return true;
}

template <typename CharacterType>
inline bool CSSTokenizer::detectFunctionTypeToken(int length)
{
    ASSERT(length > 0);
    CharacterType* name = tokenStart<CharacterType>();
    SWITCH(name, length) {
        CASE("not") {
            m_token = NOTFUNCTION;
            return true;
        }
        CASE("url") {
            m_token = URI;
            return true;
        }
        CASE("cue") {
            m_token = CUEFUNCTION;
            return true;
        }
        CASE("calc") {
            m_token = CALCFUNCTION;
            return true;
        }
        CASE("host") {
            m_token = HOSTFUNCTION;
            return true;
        }
        CASE("host-context") {
            m_token = HOSTCONTEXTFUNCTION;
            return true;
        }
        CASE("nth-child") {
            m_parsingMode = NthChildMode;
            return true;
        }
        CASE("nth-of-type") {
            m_parsingMode = NthChildMode;
            return true;
        }
        CASE("nth-last-child") {
            m_parsingMode = NthChildMode;
            return true;
        }
        CASE("nth-last-of-type") {
            m_parsingMode = NthChildMode;
            return true;
        }
    }
    return false;
}

template <typename CharacterType>
inline void CSSTokenizer::detectMediaQueryToken(int length)
{
    ASSERT(m_parsingMode == MediaQueryMode);
    CharacterType* name = tokenStart<CharacterType>();

    SWITCH(name, length) {
        CASE("and") {
            m_token = MEDIA_AND;
        }
        CASE("not") {
            m_token = MEDIA_NOT;
        }
        CASE("only") {
            m_token = MEDIA_ONLY;
        }
        CASE("or") {
            m_token = MEDIA_OR;
        }
    }
}

template <typename CharacterType>
inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
{
    ASSERT(length > 0);

    SWITCH(type, length) {
        CASE("cm") {
            m_token = CMS;
        }
        CASE("ch") {
            m_token = CHS;
        }
        CASE("deg") {
            m_token = DEGS;
        }
        CASE("dppx") {
            // There is a discussion about the name of this unit on www-style.
            // Keep this compile time guard in place until that is resolved.
            // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html
            m_token = DPPX;
        }
        CASE("dpcm") {
            m_token = DPCM;
        }
        CASE("dpi") {
            m_token = DPI;
        }
        CASE("em") {
            m_token = EMS;
        }
        CASE("ex") {
            m_token = EXS;
        }
        CASE("fr") {
            m_token = FR;
        }
        CASE("grad") {
            m_token = GRADS;
        }
        CASE("hz") {
            m_token = HERTZ;
        }
        CASE("in") {
            m_token = INS;
        }
        CASE("khz") {
            m_token = KHERTZ;
        }
        CASE("mm") {
            m_token = MMS;
        }
        CASE("ms") {
            m_token = MSECS;
        }
        CASE("px") {
            m_token = PXS;
        }
        CASE("pt") {
            m_token = PTS;
        }
        CASE("pc") {
            m_token = PCS;
        }
        CASE("rad") {
            m_token = RADS;
        }
        CASE("rem") {
            m_token = REMS;
        }
        CASE("s") {
            m_token = SECS;
        }
        CASE("turn") {
            m_token = TURNS;
        }
        CASE("vw") {
            m_token = VW;
        }
        CASE("vh") {
            m_token = VH;
        }
        CASE("vmin") {
            m_token = VMIN;
        }
        CASE("vmax") {
            m_token = VMAX;
        }
        CASE("__qem") {
            m_token = QEMS;
        }
    }
}

template <typename CharacterType>
inline void CSSTokenizer::detectDashToken(int length)
{
    CharacterType* name = tokenStart<CharacterType>();

    // Ignore leading dash.
    ++name;
    --length;

    SWITCH(name, length) {
        CASE("webkit-any") {
            m_token = ANYFUNCTION;
        }
        CASE("webkit-min") {
            m_token = MINFUNCTION;
        }
        CASE("webkit-max") {
            m_token = MAXFUNCTION;
        }
        CASE("webkit-calc") {
            m_token = CALCFUNCTION;
        }
    }
}

template <typename CharacterType>
inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
{
    CharacterType* name = tokenStart<CharacterType>();
    ASSERT(name[0] == '@' && length >= 2);

    // Ignore leading @.
    ++name;
    --length;

    // charset, font-face, import, media, namespace, page, supports,
    // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.
    SWITCH(name, length) {
        CASE("bottom-left") {
            if (LIKELY(!hasEscape))
                m_token = BOTTOMLEFT_SYM;
        }
        CASE("bottom-right") {
            if (LIKELY(!hasEscape))
                m_token = BOTTOMRIGHT_SYM;
        }
        CASE("bottom-center") {
            if (LIKELY(!hasEscape))
                m_token = BOTTOMCENTER_SYM;
        }
        CASE("bottom-left-corner") {
            if (LIKELY(!hasEscape))
                m_token = BOTTOMLEFTCORNER_SYM;
        }
        CASE("bottom-right-corner") {
            if (LIKELY(!hasEscape))
                m_token = BOTTOMRIGHTCORNER_SYM;
        }
        CASE("charset") {
            if (name - 1 == dataStart<CharacterType>())
                m_token = CHARSET_SYM;
        }
        CASE("font-face") {
            m_token = FONT_FACE_SYM;
        }
        CASE("import") {
            m_parsingMode = MediaQueryMode;
            m_token = IMPORT_SYM;
        }
        CASE("keyframes") {
            if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
                m_token = KEYFRAMES_SYM;
        }
        CASE("left-top") {
            if (LIKELY(!hasEscape))
                m_token = LEFTTOP_SYM;
        }
        CASE("left-middle") {
            if (LIKELY(!hasEscape))
                m_token = LEFTMIDDLE_SYM;
        }
        CASE("left-bottom") {
            if (LIKELY(!hasEscape))
                m_token = LEFTBOTTOM_SYM;
        }
        CASE("media") {
            m_parsingMode = MediaQueryMode;
            m_token = MEDIA_SYM;
        }
        CASE("namespace") {
            m_token = NAMESPACE_SYM;
        }
        CASE("page") {
            m_token = PAGE_SYM;
        }
        CASE("right-top") {
            if (LIKELY(!hasEscape))
                m_token = RIGHTTOP_SYM;
        }
        CASE("right-middle") {
            if (LIKELY(!hasEscape))
                m_token = RIGHTMIDDLE_SYM;
        }
        CASE("right-bottom") {
            if (LIKELY(!hasEscape))
                m_token = RIGHTBOTTOM_SYM;
        }
        CASE("supports") {
            m_parsingMode = SupportsMode;
            m_token = SUPPORTS_SYM;
        }
        CASE("top-left") {
            if (LIKELY(!hasEscape))
                m_token = TOPLEFT_SYM;
        }
        CASE("top-right") {
            if (LIKELY(!hasEscape))
                m_token = TOPRIGHT_SYM;
        }
        CASE("top-center") {
            if (LIKELY(!hasEscape))
                m_token = TOPCENTER_SYM;
        }
        CASE("top-left-corner") {
            if (LIKELY(!hasEscape))
                m_token = TOPLEFTCORNER_SYM;
        }
        CASE("top-right-corner") {
            if (LIKELY(!hasEscape))
                m_token = TOPRIGHTCORNER_SYM;
        }
        CASE("viewport") {
            m_token = VIEWPORT_RULE_SYM;
        }
        CASE("-internal-rule") {
            if (LIKELY(!hasEscape && m_internal))
                m_token = INTERNAL_RULE_SYM;
        }
        CASE("-internal-decls") {
            if (LIKELY(!hasEscape && m_internal))
                m_token = INTERNAL_DECLS_SYM;
        }
        CASE("-internal-value") {
            if (LIKELY(!hasEscape && m_internal))
                m_token = INTERNAL_VALUE_SYM;
        }
        CASE("-webkit-keyframes") {
            m_token = WEBKIT_KEYFRAMES_SYM;
        }
        CASE("-internal-selector") {
            if (LIKELY(!hasEscape && m_internal))
                m_token = INTERNAL_SELECTOR_SYM;
        }
        CASE("-internal-medialist") {
            if (!m_internal)
                return;
            m_parsingMode = MediaQueryMode;
            m_token = INTERNAL_MEDIALIST_SYM;
        }
        CASE("-internal-keyframe-rule") {
            if (LIKELY(!hasEscape && m_internal))
                m_token = INTERNAL_KEYFRAME_RULE_SYM;
        }
        CASE("-internal-keyframe-key-list") {
            if (!m_internal)
                return;
            m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
        }
        CASE("-internal-supports-condition") {
            if (!m_internal)
                return;
            m_parsingMode = SupportsMode;
            m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
        }
    }
}

template <typename CharacterType>
inline void CSSTokenizer::detectSupportsToken(int length)
{
    ASSERT(m_parsingMode == SupportsMode);
    CharacterType* name = tokenStart<CharacterType>();

    SWITCH(name, length) {
        CASE("or") {
            m_token = SUPPORTS_OR;
        }
        CASE("and") {
            m_token = SUPPORTS_AND;
        }
        CASE("not") {
            m_token = SUPPORTS_NOT;
        }
    }
}

template <typename SrcCharacterType>
int CSSTokenizer::realLex(void* yylvalWithoutType)
{
    YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
    // Write pointer for the next character.
    SrcCharacterType* result;
    CSSParserString resultString;
    bool hasEscape;

    // The input buffer is terminated by a \0 character, so
    // it is safe to read one character ahead of a known non-null.
#ifndef NDEBUG
    // In debug we check with an ASSERT that the length is > 0 for string types.
    yylval->string.clear();
#endif

restartAfterComment:
    result = currentCharacter<SrcCharacterType>();
    setTokenStart(result);
    m_tokenStartLineNumber = m_lineNumber;
    m_token = *currentCharacter<SrcCharacterType>();
    ++currentCharacter<SrcCharacterType>();

    switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
    case CharacterCaselessU:
        if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
            if (parseUnicodeRange<SrcCharacterType>()) {
                m_token = UNICODERANGE;
                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
                break;
            }
        }
        // Fall through to CharacterIdentifierStart.

    case CharacterIdentifierStart:
        --currentCharacter<SrcCharacterType>();
        parseIdentifier(result, yylval->string, hasEscape);
        m_token = IDENT;

        if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
            if (m_parsingMode == SupportsMode && !hasEscape) {
                detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
                if (m_token != IDENT)
                    break;
            }

            m_token = FUNCTION;
            if (!hasEscape)
                detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());

            // Skip parenthesis
            ++currentCharacter<SrcCharacterType>();
            ++result;
            ++yylval->string.m_length;

            if (m_token == URI) {
                m_token = FUNCTION;
                // Check whether it is really an URI.
                if (yylval->string.is8Bit())
                    parseURI<LChar>(yylval->string);
                else
                    parseURI<UChar>(yylval->string);
            }
        } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
            if (m_parsingMode == MediaQueryMode) {
                detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
            } else if (m_parsingMode == SupportsMode) {
                detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
            } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
                if (result - tokenStart<SrcCharacterType>() == 1) {
                    // String "n" is IDENT but "n+1" is NTH.
                    if (parseNthChildExtra<SrcCharacterType>()) {
                        m_token = NTH;
                        yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
                    }
                } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
                    // String "n-" is IDENT but "n-1" is NTH.
                    // Set currentCharacter to '-' to continue parsing.
                    SrcCharacterType* nextCharacter = result;
                    currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
                    if (parseNthChildExtra<SrcCharacterType>()) {
                        m_token = NTH;
                        yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
                    } else {
                        // Revert the change to currentCharacter if unsuccessful.
                        currentCharacter<SrcCharacterType>() = nextCharacter;
                    }
                }
            }
        }
        break;

    case CharacterDot:
        if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
            break;
        // Fall through to CharacterNumber.

    case CharacterNumber: {
        bool dotSeen = (m_token == '.');

        while (true) {
            if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
                // Only one dot is allowed for a number,
                // and it must be followed by a digit.
                if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
                    break;
                dotSeen = true;
            }
            ++currentCharacter<SrcCharacterType>();
        }

        if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
            // "[0-9]+n" is always an NthChild.
            ++currentCharacter<SrcCharacterType>();
            parseNthChildExtra<SrcCharacterType>();
            m_token = NTH;
            yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
            break;
        }

        // Use SVG parser for numbers on SVG presentation attributes.
        if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
            // We need to take care of units like 'em' or 'ex'.
            SrcCharacterType* character = currentCharacter<SrcCharacterType>();
            if (isASCIIAlphaCaselessEqual(*character, 'e')) {
                ASSERT(character - tokenStart<SrcCharacterType>() > 0);
                ++character;
                if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
                    ++character;
                    while (isASCIIDigit(*character))
                        ++character;
                    // Use FLOATTOKEN if the string contains exponents.
                    dotSeen = true;
                    currentCharacter<SrcCharacterType>() = character;
                }
            }
            if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
                break;
        } else {
            yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
        }

        // Type of the function.
        if (isIdentifierStart<SrcCharacterType>()) {
            SrcCharacterType* type = currentCharacter<SrcCharacterType>();
            result = currentCharacter<SrcCharacterType>();

            parseIdentifier(result, resultString, hasEscape);

            m_token = DIMEN;
            if (!hasEscape)
                detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);

            if (m_token == DIMEN) {
                // The decoded number is overwritten, but this is intentional.
                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
            }
        } else if (*currentCharacter<SrcCharacterType>() == '%') {
            // Although the CSS grammar says {num}% we follow
            // webkit at the moment which uses {num}%+.
            do {
                ++currentCharacter<SrcCharacterType>();
            } while (*currentCharacter<SrcCharacterType>() == '%');
            m_token = PERCENTAGE;
        } else {
            m_token = dotSeen ? FLOATTOKEN : INTEGER;
        }
        break;
    }

    case CharacterDash:
        if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
            --currentCharacter<SrcCharacterType>();
            parseIdentifier(result, resultString, hasEscape);
            m_token = IDENT;

            if (*currentCharacter<SrcCharacterType>() == '(') {
                m_token = FUNCTION;
                if (!hasEscape)
                    detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
                ++currentCharacter<SrcCharacterType>();
                ++result;
            } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
                if (result - tokenStart<SrcCharacterType>() == 2) {
                    // String "-n" is IDENT but "-n+1" is NTH.
                    if (parseNthChildExtra<SrcCharacterType>()) {
                        m_token = NTH;
                        result = currentCharacter<SrcCharacterType>();
                    }
                } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
                    // String "-n-" is IDENT but "-n-1" is NTH.
                    // Set currentCharacter to second '-' of '-n-' to continue parsing.
                    SrcCharacterType* nextCharacter = result;
                    currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
                    if (parseNthChildExtra<SrcCharacterType>()) {
                        m_token = NTH;
                        result = currentCharacter<SrcCharacterType>();
                    } else {
                        // Revert the change to currentCharacter if unsuccessful.
                        currentCharacter<SrcCharacterType>() = nextCharacter;
                    }
                }
            }
            resultString.setLength(result - tokenStart<SrcCharacterType>());
            yylval->string = resultString;
        } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
            currentCharacter<SrcCharacterType>() += 2;
            m_token = SGML_CD;
        } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
            // "-[0-9]+n" is always an NthChild.
            if (parseNthChild<SrcCharacterType>()) {
                parseNthChildExtra<SrcCharacterType>();
                m_token = NTH;
                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
            }
        }
        break;

    case CharacterOther:
        // m_token is simply the current character.
        break;

    case CharacterNull:
        // Do not advance pointer at the end of input.
        --currentCharacter<SrcCharacterType>();
        break;

    case CharacterWhiteSpace:
        m_token = WHITESPACE;
        // Might start with a '\n'.
        --currentCharacter<SrcCharacterType>();
        do {
            if (*currentCharacter<SrcCharacterType>() == '\n')
                ++m_lineNumber;
            ++currentCharacter<SrcCharacterType>();
        } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
        break;

    case CharacterEndMediaQueryOrSupports:
        if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
            m_parsingMode = NormalMode;
        break;

    case CharacterEndNthChild:
        if (m_parsingMode == NthChildMode)
            m_parsingMode = NormalMode;
        break;

    case CharacterQuote:
        if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token)) {
            ++result;
            parseString<SrcCharacterType>(result, yylval->string, m_token);
            m_token = STRING;
        }
        break;

    case CharacterExclamationMark: {
        SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
        if (isEqualToCSSIdentifier(start, "important")) {
            m_token = IMPORTANT_SYM;
            currentCharacter<SrcCharacterType>() = start + 9;
        }
        break;
    }

    case CharacterHashmark: {
        SrcCharacterType* start = currentCharacter<SrcCharacterType>();
        result = currentCharacter<SrcCharacterType>();

        if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
            // This must be a valid hex number token.
            do {
                ++currentCharacter<SrcCharacterType>();
            } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
            m_token = HEX;
            yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
        } else if (isIdentifierStart<SrcCharacterType>()) {
            m_token = IDSEL;
            parseIdentifier(result, yylval->string, hasEscape);
            if (!hasEscape) {
                // Check whether the identifier is also a valid hex number.
                SrcCharacterType* current = start;
                m_token = HEX;
                do {
                    if (!isASCIIHexDigit(*current)) {
                        m_token = IDSEL;
                        break;
                    }
                    ++current;
                } while (current < result);
            }
        }
        break;
    }

    case CharacterSlash:
        // Ignore comments. They are not even considered as white spaces.
        if (*currentCharacter<SrcCharacterType>() == '*') {
            const CSSParserLocation startLocation = currentLocation();
            if (m_parser.m_observer) {
                unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash.
                m_parser.m_observer->startComment(startOffset - m_parsedTextPrefixLength);
            }
            ++currentCharacter<SrcCharacterType>();
            while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
                if (*currentCharacter<SrcCharacterType>() == '\n')
                    ++m_lineNumber;
                if (*currentCharacter<SrcCharacterType>() == '\0') {
                    // Unterminated comments are simply ignored.
                    currentCharacter<SrcCharacterType>() -= 2;
                    m_parser.reportError(startLocation, UnterminatedCommentCSSError);
                    break;
                }
                ++currentCharacter<SrcCharacterType>();
            }
            currentCharacter<SrcCharacterType>() += 2;
            if (m_parser.m_observer) {
                unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
                unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
                m_parser.m_observer->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
            }
            goto restartAfterComment;
        }
        break;

    case CharacterDollar:
        if (*currentCharacter<SrcCharacterType>() == '=') {
            ++currentCharacter<SrcCharacterType>();
            m_token = ENDSWITH;
        }
        break;

    case CharacterAsterisk:
        if (*currentCharacter<SrcCharacterType>() == '=') {
            ++currentCharacter<SrcCharacterType>();
            m_token = CONTAINS;
        }
        break;

    case CharacterPlus:
        if (UNLIKELY(m_parsingMode == NthChildMode)) {
            // Simplest case. "+[0-9]*n" is always NthChild.
            if (parseNthChild<SrcCharacterType>()) {
                parseNthChildExtra<SrcCharacterType>();
                m_token = NTH;
                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
            }
        }
        break;

    case CharacterLess:
        if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
            currentCharacter<SrcCharacterType>() += 3;
            m_token = SGML_CD;
        }
        break;

    case CharacterAt:
        if (isIdentifierStart<SrcCharacterType>()) {
            m_token = ATKEYWORD;
            ++result;
            parseIdentifier(result, resultString, hasEscape);
            detectAtToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>(), hasEscape);
        }
        break;

    case CharacterBackSlash:
        if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
            --currentCharacter<SrcCharacterType>();
            parseIdentifier(result, yylval->string, hasEscape);
            m_token = IDENT;
        }
        break;

    case CharacterXor:
        if (*currentCharacter<SrcCharacterType>() == '=') {
            ++currentCharacter<SrcCharacterType>();
            m_token = BEGINSWITH;
        }
        break;

    case CharacterVerticalBar:
        if (*currentCharacter<SrcCharacterType>() == '=') {
            ++currentCharacter<SrcCharacterType>();
            m_token = DASHMATCH;
        }
        break;

    case CharacterTilde:
        if (*currentCharacter<SrcCharacterType>() == '=') {
            ++currentCharacter<SrcCharacterType>();
            m_token = INCLUDES;
        }
        break;

    default:
        ASSERT_NOT_REACHED();
        break;
    }

    return m_token;
}

template <>
inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
{
    m_tokenStart.ptr8 = tokenStart;
}

template <>
inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
{
    m_tokenStart.ptr16 = tokenStart;
}

void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
{
    m_parsedTextPrefixLength = prefixLength;
    m_parsedTextSuffixLength = suffixLength;
    unsigned stringLength = string.length();
    unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
    m_length = length;

    if (!stringLength || string.is8Bit()) {
        m_dataStart8 = adoptArrayPtr(new LChar[length]);
        for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
            m_dataStart8[i] = prefix[i];

        if (stringLength)
            memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));

        unsigned start = m_parsedTextPrefixLength + stringLength;
        unsigned end = start + suffixLength;
        for (unsigned i = start; i < end; i++)
            m_dataStart8[i] = suffix[i - start];

        m_dataStart8[length - 1] = 0;

        m_is8BitSource = true;
        m_currentCharacter8 = m_dataStart8.get();
        m_currentCharacter16 = 0;
        setTokenStart<LChar>(m_currentCharacter8);
        m_lexFunc = &CSSTokenizer::realLex<LChar>;
        return;
    }

    m_dataStart16 = adoptArrayPtr(new UChar[length]);
    for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
        m_dataStart16[i] = prefix[i];

    ASSERT(stringLength);
    memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));

    unsigned start = m_parsedTextPrefixLength + stringLength;
    unsigned end = start + suffixLength;
    for (unsigned i = start; i < end; i++)
        m_dataStart16[i] = suffix[i - start];

    m_dataStart16[length - 1] = 0;

    m_is8BitSource = false;
    m_currentCharacter8 = 0;
    m_currentCharacter16 = m_dataStart16.get();
    setTokenStart<UChar>(m_currentCharacter16);
    m_lexFunc = &CSSTokenizer::realLex<UChar>;
}

} // namespace WebCore
/* [<][>][^][v][top][bottom][index][help] */
root/Source/core/css/CSSTokenizer-in.cpp

DEFINITIONS