This source file includes following definitions.
- isCSSLetter
- isCSSEscape
- isURILetter
- isIdentifierStartAfterDash
- isEqualToCSSIdentifier
- isEqualToCSSCaseSensitiveIdentifier
- checkAndSkipEscape
- skipWhiteSpace
- allocateStringBuffer16
- tokenLocation
- currentLocation
- isIdentifierStart
- checkAndSkipString
- parseEscape
- peekMaxIdentifierLen
- parseIdentifierInternal
- parseIdentifier
- peekMaxStringLen
- parseStringInternal
- parseString
- findURI
- peekMaxURILen
- parseURIInternal
- parseURI
- parseUnicodeRange
- parseNthChild
- parseNthChildExtra
- detectFunctionTypeToken
- detectMediaQueryToken
- detectNumberToken
- detectDashToken
- detectAtToken
- detectSupportsToken
- realLex
- setupTokenizer
#include "config.h"
#include "core/css/CSSTokenizer.h"
#include "core/css/CSSKeyframeRule.h"
#include "core/css/parser/BisonCSSParser.h"
#include "core/css/CSSParserValues.h"
#include "core/css/MediaQuery.h"
#include "core/css/StyleRule.h"
#include "core/html/parser/HTMLParserIdioms.h"
#include "core/svg/SVGParserUtilities.h"
namespace WebCore {
#include "CSSGrammar.h"
enum CharacterType {
CharacterCaselessU,
CharacterIdentifierStart,
CharacterNumber,
CharacterDash,
CharacterOther,
CharacterNull,
CharacterWhiteSpace,
CharacterEndMediaQueryOrSupports,
CharacterEndNthChild,
CharacterQuote,
CharacterExclamationMark,
CharacterHashmark,
CharacterDollar,
CharacterAsterisk,
CharacterPlus,
CharacterDot,
CharacterSlash,
CharacterLess,
CharacterAt,
CharacterBackSlash,
CharacterXor,
CharacterVerticalBar,
CharacterTilde,
};
static const CharacterType typesOfASCIICharacters[128] = {
CharacterNull,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterWhiteSpace,
CharacterWhiteSpace,
CharacterOther,
CharacterWhiteSpace,
CharacterWhiteSpace,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterWhiteSpace,
CharacterExclamationMark,
CharacterQuote,
CharacterHashmark,
CharacterDollar,
CharacterOther,
CharacterOther,
CharacterQuote,
CharacterOther,
CharacterEndNthChild,
CharacterAsterisk,
CharacterPlus,
CharacterOther,
CharacterDash,
CharacterDot,
CharacterSlash,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterOther,
CharacterEndMediaQueryOrSupports,
CharacterLess,
CharacterOther,
CharacterOther,
CharacterOther,
CharacterAt,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterCaselessU,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterOther,
CharacterBackSlash,
CharacterOther,
CharacterXor,
CharacterIdentifierStart,
CharacterOther,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterCaselessU,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterEndMediaQueryOrSupports,
CharacterVerticalBar,
CharacterOther,
CharacterTilde,
CharacterOther,
};
template <typename CharacterType>
static inline bool isCSSLetter(CharacterType character)
{
return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
}
template <typename CharacterType>
static inline bool isCSSEscape(CharacterType character)
{
return character >= ' ' && character != 127;
}
template <typename CharacterType>
static inline bool isURILetter(CharacterType character)
{
return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
}
template <typename CharacterType>
static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
{
return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
|| (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
}
template <typename CharacterType>
static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
{
do {
ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
ASSERT(*constantString != '-' || isCSSLetter(*cssString));
if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
return false;
} while (*constantString);
return true;
}
template <typename CharacterType>
static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
{
ASSERT(*constantString);
do {
if (*string++ != *constantString++)
return false;
} while (*constantString);
return true;
}
template <typename CharacterType>
static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
{
ASSERT(*currentCharacter == '\\');
++currentCharacter;
if (!isCSSEscape(*currentCharacter))
return 0;
if (isASCIIHexDigit(*currentCharacter)) {
int length = 6;
do {
++currentCharacter;
} while (isASCIIHexDigit(*currentCharacter) && --length);
if (isHTMLSpace<CharacterType>(*currentCharacter))
++currentCharacter;
return currentCharacter;
}
return currentCharacter + 1;
}
template <typename CharacterType>
static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
{
while (isHTMLSpace<CharacterType>(*currentCharacter))
++currentCharacter;
return currentCharacter;
}
template <>
inline LChar*& CSSTokenizer::currentCharacter<LChar>()
{
return m_currentCharacter8;
}
template <>
inline UChar*& CSSTokenizer::currentCharacter<UChar>()
{
return m_currentCharacter16;
}
UChar* CSSTokenizer::allocateStringBuffer16(size_t len)
{
OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
UChar* bufferPtr = buffer.get();
m_cssStrings16.append(buffer.release());
return bufferPtr;
}
template <>
inline LChar* CSSTokenizer::dataStart<LChar>()
{
return m_dataStart8.get();
}
template <>
inline UChar* CSSTokenizer::dataStart<UChar>()
{
return m_dataStart16.get();
}
template <typename CharacterType>
inline CSSParserLocation CSSTokenizer::tokenLocation()
{
CSSParserLocation location;
location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
location.lineNumber = m_tokenStartLineNumber;
location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
return location;
}
CSSParserLocation CSSTokenizer::currentLocation()
{
if (is8BitSource())
return tokenLocation<LChar>();
return tokenLocation<UChar>();
}
template <typename CharacterType>
inline bool CSSTokenizer::isIdentifierStart()
{
return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
}
template <typename CharacterType>
static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote)
{
while (true) {
if (UNLIKELY(*currentCharacter == quote)) {
return currentCharacter + 1;
}
if (UNLIKELY(!*currentCharacter)) {
return currentCharacter;
}
if (UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
return 0;
}
if (LIKELY(currentCharacter[0] != '\\')) {
++currentCharacter;
} else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
currentCharacter += 2;
} else if (currentCharacter[1] == '\r') {
currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
} else {
currentCharacter = checkAndSkipEscape(currentCharacter);
if (!currentCharacter)
return 0;
}
}
}
template <typename CharacterType>
unsigned CSSTokenizer::parseEscape(CharacterType*& src)
{
ASSERT(*src == '\\' && isCSSEscape(src[1]));
unsigned unicode = 0;
++src;
if (isASCIIHexDigit(*src)) {
int length = 6;
do {
unicode = (unicode << 4) + toASCIIHexValue(*src++);
} while (--length && isASCIIHexDigit(*src));
if (unicode > 0x10ffff)
unicode = 0xfffd;
if (isHTMLSpace<CharacterType>(*src))
++src;
return unicode;
}
return *src++;
}
template <>
inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
{
ASSERT(unicode <= 0xff);
*result = unicode;
++result;
}
template <>
inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
{
if (U16_LENGTH(unicode) == 2) {
*result++ = U16_LEAD(unicode);
*result = U16_TRAIL(unicode);
} else {
*result = unicode;
}
++result;
}
template <typename SrcCharacterType>
size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
{
SrcCharacterType* start = src;
do {
if (LIKELY(*src != '\\'))
src++;
else
parseEscape<SrcCharacterType>(src);
} while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
return src - start;
}
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
{
hasEscape = false;
do {
if (LIKELY(*src != '\\')) {
*result++ = *src++;
} else {
hasEscape = true;
SrcCharacterType* savedEscapeStart = src;
unsigned unicode = parseEscape<SrcCharacterType>(src);
if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
src = savedEscapeStart;
return false;
}
UnicodeToChars(result, unicode);
}
} while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
return true;
}
template <typename CharacterType>
inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
{
ASSERT(isIdentifierStart<CharacterType>());
CharacterType* start = currentCharacter<CharacterType>();
if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
ASSERT(is8BitSource());
UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdentifierLen(currentCharacter<CharacterType>()));
UChar* start16 = result16;
int i = 0;
for (; i < result - start; i++)
result16[i] = start[i];
result16 += i;
parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);
resultString.init(start16, result16 - start16);
return;
}
resultString.init(start, result - start);
}
template <typename SrcCharacterType>
size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
{
SrcCharacterType* end = checkAndSkipString(src, quote);
return end ? end - src : 0;
}
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
{
while (true) {
if (UNLIKELY(*src == quote)) {
++src;
return true;
}
if (UNLIKELY(!*src)) {
return true;
}
ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
if (LIKELY(src[0] != '\\')) {
*result++ = *src++;
} else if (src[1] == '\n' || src[1] == '\f') {
src += 2;
} else if (src[1] == '\r') {
src += src[2] == '\n' ? 3 : 2;
} else {
SrcCharacterType* savedEscapeStart = src;
unsigned unicode = parseEscape<SrcCharacterType>(src);
if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
src = savedEscapeStart;
return false;
}
UnicodeToChars(result, unicode);
}
}
return true;
}
template <typename CharacterType>
inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
{
CharacterType* start = currentCharacter<CharacterType>();
if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
ASSERT(is8BitSource());
UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStringLen(currentCharacter<CharacterType>(), quote));
UChar* start16 = result16;
int i = 0;
for (; i < result - start; i++)
result16[i] = start[i];
result16 += i;
parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
resultString.init(start16, result16 - start16);
return;
}
resultString.init(start, result - start);
}
template <typename CharacterType>
inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
{
start = skipWhiteSpace(currentCharacter<CharacterType>());
if (*start == '"' || *start == '\'') {
quote = *start++;
end = checkAndSkipString(start, quote);
if (!end)
return false;
} else {
quote = 0;
end = start;
while (isURILetter(*end)) {
if (LIKELY(*end != '\\')) {
++end;
} else {
end = checkAndSkipEscape(end);
if (!end)
return false;
}
}
}
end = skipWhiteSpace(end);
if (*end != ')')
return false;
return true;
}
template <typename SrcCharacterType>
inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
{
SrcCharacterType* start = src;
if (quote) {
ASSERT(quote == '"' || quote == '\'');
return peekMaxStringLen(src, quote);
}
while (isURILetter(*src)) {
if (LIKELY(*src != '\\'))
src++;
else
parseEscape<SrcCharacterType>(src);
}
return src - start;
}
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
{
if (quote) {
ASSERT(quote == '"' || quote == '\'');
return parseStringInternal(src, dest, quote);
}
while (isURILetter(*src)) {
if (LIKELY(*src != '\\')) {
*dest++ = *src++;
} else {
unsigned unicode = parseEscape<SrcCharacterType>(src);
if (unicode > 0xff && sizeof(DestCharacterType) == 1)
return false;
UnicodeToChars(dest, unicode);
}
}
return true;
}
template <typename CharacterType>
inline void CSSTokenizer::parseURI(CSSParserString& string)
{
CharacterType* uriStart;
CharacterType* uriEnd;
UChar quote;
if (!findURI(uriStart, uriEnd, quote))
return;
CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
string.init(uriStart, dest - uriStart);
} else {
ASSERT(is8BitSource());
currentCharacter<CharacterType>() = uriStart;
UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<CharacterType>(), quote));
UChar* uriStart16 = result16;
bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);
ASSERT_UNUSED(result, result);
string.init(uriStart16, result16 - uriStart16);
}
currentCharacter<CharacterType>() = uriEnd + 1;
m_token = URI;
}
template <typename CharacterType>
inline bool CSSTokenizer::parseUnicodeRange()
{
CharacterType* character = currentCharacter<CharacterType>() + 1;
int length = 6;
ASSERT(*currentCharacter<CharacterType>() == '+');
while (isASCIIHexDigit(*character) && length) {
++character;
--length;
}
if (length && *character == '?') {
do {
++character;
--length;
} while (*character == '?' && length);
currentCharacter<CharacterType>() = character;
return true;
}
if (length < 6) {
if (character[0] == '-' && isASCIIHexDigit(character[1])) {
++character;
length = 6;
do {
++character;
} while (--length && isASCIIHexDigit(*character));
}
currentCharacter<CharacterType>() = character;
return true;
}
return false;
}
template <typename CharacterType>
bool CSSTokenizer::parseNthChild()
{
CharacterType* character = currentCharacter<CharacterType>();
while (isASCIIDigit(*character))
++character;
if (isASCIIAlphaCaselessEqual(*character, 'n')) {
currentCharacter<CharacterType>() = character + 1;
return true;
}
return false;
}
template <typename CharacterType>
bool CSSTokenizer::parseNthChildExtra()
{
CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
if (*character != '+' && *character != '-')
return false;
character = skipWhiteSpace(character + 1);
if (!isASCIIDigit(*character))
return false;
do {
++character;
} while (isASCIIDigit(*character));
currentCharacter<CharacterType>() = character;
return true;
}
template <typename CharacterType>
inline bool CSSTokenizer::detectFunctionTypeToken(int length)
{
ASSERT(length > 0);
CharacterType* name = tokenStart<CharacterType>();
SWITCH(name, length) {
CASE("not") {
m_token = NOTFUNCTION;
return true;
}
CASE("url") {
m_token = URI;
return true;
}
CASE("cue") {
m_token = CUEFUNCTION;
return true;
}
CASE("calc") {
m_token = CALCFUNCTION;
return true;
}
CASE("host") {
m_token = HOSTFUNCTION;
return true;
}
CASE("host-context") {
m_token = HOSTCONTEXTFUNCTION;
return true;
}
CASE("nth-child") {
m_parsingMode = NthChildMode;
return true;
}
CASE("nth-of-type") {
m_parsingMode = NthChildMode;
return true;
}
CASE("nth-last-child") {
m_parsingMode = NthChildMode;
return true;
}
CASE("nth-last-of-type") {
m_parsingMode = NthChildMode;
return true;
}
}
return false;
}
template <typename CharacterType>
inline void CSSTokenizer::detectMediaQueryToken(int length)
{
ASSERT(m_parsingMode == MediaQueryMode);
CharacterType* name = tokenStart<CharacterType>();
SWITCH(name, length) {
CASE("and") {
m_token = MEDIA_AND;
}
CASE("not") {
m_token = MEDIA_NOT;
}
CASE("only") {
m_token = MEDIA_ONLY;
}
CASE("or") {
m_token = MEDIA_OR;
}
}
}
template <typename CharacterType>
inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
{
ASSERT(length > 0);
SWITCH(type, length) {
CASE("cm") {
m_token = CMS;
}
CASE("ch") {
m_token = CHS;
}
CASE("deg") {
m_token = DEGS;
}
CASE("dppx") {
m_token = DPPX;
}
CASE("dpcm") {
m_token = DPCM;
}
CASE("dpi") {
m_token = DPI;
}
CASE("em") {
m_token = EMS;
}
CASE("ex") {
m_token = EXS;
}
CASE("fr") {
m_token = FR;
}
CASE("grad") {
m_token = GRADS;
}
CASE("hz") {
m_token = HERTZ;
}
CASE("in") {
m_token = INS;
}
CASE("khz") {
m_token = KHERTZ;
}
CASE("mm") {
m_token = MMS;
}
CASE("ms") {
m_token = MSECS;
}
CASE("px") {
m_token = PXS;
}
CASE("pt") {
m_token = PTS;
}
CASE("pc") {
m_token = PCS;
}
CASE("rad") {
m_token = RADS;
}
CASE("rem") {
m_token = REMS;
}
CASE("s") {
m_token = SECS;
}
CASE("turn") {
m_token = TURNS;
}
CASE("vw") {
m_token = VW;
}
CASE("vh") {
m_token = VH;
}
CASE("vmin") {
m_token = VMIN;
}
CASE("vmax") {
m_token = VMAX;
}
CASE("__qem") {
m_token = QEMS;
}
}
}
template <typename CharacterType>
inline void CSSTokenizer::detectDashToken(int length)
{
CharacterType* name = tokenStart<CharacterType>();
++name;
--length;
SWITCH(name, length) {
CASE("webkit-any") {
m_token = ANYFUNCTION;
}
CASE("webkit-min") {
m_token = MINFUNCTION;
}
CASE("webkit-max") {
m_token = MAXFUNCTION;
}
CASE("webkit-calc") {
m_token = CALCFUNCTION;
}
}
}
template <typename CharacterType>
inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
{
CharacterType* name = tokenStart<CharacterType>();
ASSERT(name[0] == '@' && length >= 2);
++name;
--length;
SWITCH(name, length) {
CASE("bottom-left") {
if (LIKELY(!hasEscape))
m_token = BOTTOMLEFT_SYM;
}
CASE("bottom-right") {
if (LIKELY(!hasEscape))
m_token = BOTTOMRIGHT_SYM;
}
CASE("bottom-center") {
if (LIKELY(!hasEscape))
m_token = BOTTOMCENTER_SYM;
}
CASE("bottom-left-corner") {
if (LIKELY(!hasEscape))
m_token = BOTTOMLEFTCORNER_SYM;
}
CASE("bottom-right-corner") {
if (LIKELY(!hasEscape))
m_token = BOTTOMRIGHTCORNER_SYM;
}
CASE("charset") {
if (name - 1 == dataStart<CharacterType>())
m_token = CHARSET_SYM;
}
CASE("font-face") {
m_token = FONT_FACE_SYM;
}
CASE("import") {
m_parsingMode = MediaQueryMode;
m_token = IMPORT_SYM;
}
CASE("keyframes") {
if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
m_token = KEYFRAMES_SYM;
}
CASE("left-top") {
if (LIKELY(!hasEscape))
m_token = LEFTTOP_SYM;
}
CASE("left-middle") {
if (LIKELY(!hasEscape))
m_token = LEFTMIDDLE_SYM;
}
CASE("left-bottom") {
if (LIKELY(!hasEscape))
m_token = LEFTBOTTOM_SYM;
}
CASE("media") {
m_parsingMode = MediaQueryMode;
m_token = MEDIA_SYM;
}
CASE("namespace") {
m_token = NAMESPACE_SYM;
}
CASE("page") {
m_token = PAGE_SYM;
}
CASE("right-top") {
if (LIKELY(!hasEscape))
m_token = RIGHTTOP_SYM;
}
CASE("right-middle") {
if (LIKELY(!hasEscape))
m_token = RIGHTMIDDLE_SYM;
}
CASE("right-bottom") {
if (LIKELY(!hasEscape))
m_token = RIGHTBOTTOM_SYM;
}
CASE("supports") {
m_parsingMode = SupportsMode;
m_token = SUPPORTS_SYM;
}
CASE("top-left") {
if (LIKELY(!hasEscape))
m_token = TOPLEFT_SYM;
}
CASE("top-right") {
if (LIKELY(!hasEscape))
m_token = TOPRIGHT_SYM;
}
CASE("top-center") {
if (LIKELY(!hasEscape))
m_token = TOPCENTER_SYM;
}
CASE("top-left-corner") {
if (LIKELY(!hasEscape))
m_token = TOPLEFTCORNER_SYM;
}
CASE("top-right-corner") {
if (LIKELY(!hasEscape))
m_token = TOPRIGHTCORNER_SYM;
}
CASE("viewport") {
m_token = VIEWPORT_RULE_SYM;
}
CASE("-internal-rule") {
if (LIKELY(!hasEscape && m_internal))
m_token = INTERNAL_RULE_SYM;
}
CASE("-internal-decls") {
if (LIKELY(!hasEscape && m_internal))
m_token = INTERNAL_DECLS_SYM;
}
CASE("-internal-value") {
if (LIKELY(!hasEscape && m_internal))
m_token = INTERNAL_VALUE_SYM;
}
CASE("-webkit-keyframes") {
m_token = WEBKIT_KEYFRAMES_SYM;
}
CASE("-internal-selector") {
if (LIKELY(!hasEscape && m_internal))
m_token = INTERNAL_SELECTOR_SYM;
}
CASE("-internal-medialist") {
if (!m_internal)
return;
m_parsingMode = MediaQueryMode;
m_token = INTERNAL_MEDIALIST_SYM;
}
CASE("-internal-keyframe-rule") {
if (LIKELY(!hasEscape && m_internal))
m_token = INTERNAL_KEYFRAME_RULE_SYM;
}
CASE("-internal-keyframe-key-list") {
if (!m_internal)
return;
m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
}
CASE("-internal-supports-condition") {
if (!m_internal)
return;
m_parsingMode = SupportsMode;
m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
}
}
}
template <typename CharacterType>
inline void CSSTokenizer::detectSupportsToken(int length)
{
ASSERT(m_parsingMode == SupportsMode);
CharacterType* name = tokenStart<CharacterType>();
SWITCH(name, length) {
CASE("or") {
m_token = SUPPORTS_OR;
}
CASE("and") {
m_token = SUPPORTS_AND;
}
CASE("not") {
m_token = SUPPORTS_NOT;
}
}
}
template <typename SrcCharacterType>
int CSSTokenizer::realLex(void* yylvalWithoutType)
{
YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
SrcCharacterType* result;
CSSParserString resultString;
bool hasEscape;
#ifndef NDEBUG
yylval->string.clear();
#endif
restartAfterComment:
result = currentCharacter<SrcCharacterType>();
setTokenStart(result);
m_tokenStartLineNumber = m_lineNumber;
m_token = *currentCharacter<SrcCharacterType>();
++currentCharacter<SrcCharacterType>();
switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
case CharacterCaselessU:
if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
if (parseUnicodeRange<SrcCharacterType>()) {
m_token = UNICODERANGE;
yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
break;
}
}
case CharacterIdentifierStart:
--currentCharacter<SrcCharacterType>();
parseIdentifier(result, yylval->string, hasEscape);
m_token = IDENT;
if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
if (m_parsingMode == SupportsMode && !hasEscape) {
detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
if (m_token != IDENT)
break;
}
m_token = FUNCTION;
if (!hasEscape)
detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
++currentCharacter<SrcCharacterType>();
++result;
++yylval->string.m_length;
if (m_token == URI) {
m_token = FUNCTION;
if (yylval->string.is8Bit())
parseURI<LChar>(yylval->string);
else
parseURI<UChar>(yylval->string);
}
} else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
if (m_parsingMode == MediaQueryMode) {
detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
} else if (m_parsingMode == SupportsMode) {
detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
} else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
if (result - tokenStart<SrcCharacterType>() == 1) {
if (parseNthChildExtra<SrcCharacterType>()) {
m_token = NTH;
yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
}
} else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
SrcCharacterType* nextCharacter = result;
currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
if (parseNthChildExtra<SrcCharacterType>()) {
m_token = NTH;
yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
} else {
currentCharacter<SrcCharacterType>() = nextCharacter;
}
}
}
}
break;
case CharacterDot:
if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
break;
case CharacterNumber: {
bool dotSeen = (m_token == '.');
while (true) {
if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
break;
dotSeen = true;
}
++currentCharacter<SrcCharacterType>();
}
if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
++currentCharacter<SrcCharacterType>();
parseNthChildExtra<SrcCharacterType>();
m_token = NTH;
yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
break;
}
if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
SrcCharacterType* character = currentCharacter<SrcCharacterType>();
if (isASCIIAlphaCaselessEqual(*character, 'e')) {
ASSERT(character - tokenStart<SrcCharacterType>() > 0);
++character;
if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
++character;
while (isASCIIDigit(*character))
++character;
dotSeen = true;
currentCharacter<SrcCharacterType>() = character;
}
}
if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
break;
} else {
yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
}
if (isIdentifierStart<SrcCharacterType>()) {
SrcCharacterType* type = currentCharacter<SrcCharacterType>();
result = currentCharacter<SrcCharacterType>();
parseIdentifier(result, resultString, hasEscape);
m_token = DIMEN;
if (!hasEscape)
detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);
if (m_token == DIMEN) {
yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
}
} else if (*currentCharacter<SrcCharacterType>() == '%') {
do {
++currentCharacter<SrcCharacterType>();
} while (*currentCharacter<SrcCharacterType>() == '%');
m_token = PERCENTAGE;
} else {
m_token = dotSeen ? FLOATTOKEN : INTEGER;
}
break;
}
case CharacterDash:
if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
--currentCharacter<SrcCharacterType>();
parseIdentifier(result, resultString, hasEscape);
m_token = IDENT;
if (*currentCharacter<SrcCharacterType>() == '(') {
m_token = FUNCTION;
if (!hasEscape)
detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
++currentCharacter<SrcCharacterType>();
++result;
} else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
if (result - tokenStart<SrcCharacterType>() == 2) {
if (parseNthChildExtra<SrcCharacterType>()) {
m_token = NTH;
result = currentCharacter<SrcCharacterType>();
}
} else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
SrcCharacterType* nextCharacter = result;
currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
if (parseNthChildExtra<SrcCharacterType>()) {
m_token = NTH;
result = currentCharacter<SrcCharacterType>();
} else {
currentCharacter<SrcCharacterType>() = nextCharacter;
}
}
}
resultString.setLength(result - tokenStart<SrcCharacterType>());
yylval->string = resultString;
} else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
currentCharacter<SrcCharacterType>() += 2;
m_token = SGML_CD;
} else if (UNLIKELY(m_parsingMode == NthChildMode)) {
if (parseNthChild<SrcCharacterType>()) {
parseNthChildExtra<SrcCharacterType>();
m_token = NTH;
yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
}
}
break;
case CharacterOther:
break;
case CharacterNull:
--currentCharacter<SrcCharacterType>();
break;
case CharacterWhiteSpace:
m_token = WHITESPACE;
--currentCharacter<SrcCharacterType>();
do {
if (*currentCharacter<SrcCharacterType>() == '\n')
++m_lineNumber;
++currentCharacter<SrcCharacterType>();
} while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
break;
case CharacterEndMediaQueryOrSupports:
if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
m_parsingMode = NormalMode;
break;
case CharacterEndNthChild:
if (m_parsingMode == NthChildMode)
m_parsingMode = NormalMode;
break;
case CharacterQuote:
if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token)) {
++result;
parseString<SrcCharacterType>(result, yylval->string, m_token);
m_token = STRING;
}
break;
case CharacterExclamationMark: {
SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
if (isEqualToCSSIdentifier(start, "important")) {
m_token = IMPORTANT_SYM;
currentCharacter<SrcCharacterType>() = start + 9;
}
break;
}
case CharacterHashmark: {
SrcCharacterType* start = currentCharacter<SrcCharacterType>();
result = currentCharacter<SrcCharacterType>();
if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
do {
++currentCharacter<SrcCharacterType>();
} while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
m_token = HEX;
yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
} else if (isIdentifierStart<SrcCharacterType>()) {
m_token = IDSEL;
parseIdentifier(result, yylval->string, hasEscape);
if (!hasEscape) {
SrcCharacterType* current = start;
m_token = HEX;
do {
if (!isASCIIHexDigit(*current)) {
m_token = IDSEL;
break;
}
++current;
} while (current < result);
}
}
break;
}
case CharacterSlash:
if (*currentCharacter<SrcCharacterType>() == '*') {
const CSSParserLocation startLocation = currentLocation();
if (m_parser.m_observer) {
unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1;
m_parser.m_observer->startComment(startOffset - m_parsedTextPrefixLength);
}
++currentCharacter<SrcCharacterType>();
while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
if (*currentCharacter<SrcCharacterType>() == '\n')
++m_lineNumber;
if (*currentCharacter<SrcCharacterType>() == '\0') {
currentCharacter<SrcCharacterType>() -= 2;
m_parser.reportError(startLocation, UnterminatedCommentCSSError);
break;
}
++currentCharacter<SrcCharacterType>();
}
currentCharacter<SrcCharacterType>() += 2;
if (m_parser.m_observer) {
unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
m_parser.m_observer->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
}
goto restartAfterComment;
}
break;
case CharacterDollar:
if (*currentCharacter<SrcCharacterType>() == '=') {
++currentCharacter<SrcCharacterType>();
m_token = ENDSWITH;
}
break;
case CharacterAsterisk:
if (*currentCharacter<SrcCharacterType>() == '=') {
++currentCharacter<SrcCharacterType>();
m_token = CONTAINS;
}
break;
case CharacterPlus:
if (UNLIKELY(m_parsingMode == NthChildMode)) {
if (parseNthChild<SrcCharacterType>()) {
parseNthChildExtra<SrcCharacterType>();
m_token = NTH;
yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
}
}
break;
case CharacterLess:
if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
currentCharacter<SrcCharacterType>() += 3;
m_token = SGML_CD;
}
break;
case CharacterAt:
if (isIdentifierStart<SrcCharacterType>()) {
m_token = ATKEYWORD;
++result;
parseIdentifier(result, resultString, hasEscape);
detectAtToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>(), hasEscape);
}
break;
case CharacterBackSlash:
if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
--currentCharacter<SrcCharacterType>();
parseIdentifier(result, yylval->string, hasEscape);
m_token = IDENT;
}
break;
case CharacterXor:
if (*currentCharacter<SrcCharacterType>() == '=') {
++currentCharacter<SrcCharacterType>();
m_token = BEGINSWITH;
}
break;
case CharacterVerticalBar:
if (*currentCharacter<SrcCharacterType>() == '=') {
++currentCharacter<SrcCharacterType>();
m_token = DASHMATCH;
}
break;
case CharacterTilde:
if (*currentCharacter<SrcCharacterType>() == '=') {
++currentCharacter<SrcCharacterType>();
m_token = INCLUDES;
}
break;
default:
ASSERT_NOT_REACHED();
break;
}
return m_token;
}
template <>
inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
{
m_tokenStart.ptr8 = tokenStart;
}
template <>
inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
{
m_tokenStart.ptr16 = tokenStart;
}
void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
{
m_parsedTextPrefixLength = prefixLength;
m_parsedTextSuffixLength = suffixLength;
unsigned stringLength = string.length();
unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
m_length = length;
if (!stringLength || string.is8Bit()) {
m_dataStart8 = adoptArrayPtr(new LChar[length]);
for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
m_dataStart8[i] = prefix[i];
if (stringLength)
memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));
unsigned start = m_parsedTextPrefixLength + stringLength;
unsigned end = start + suffixLength;
for (unsigned i = start; i < end; i++)
m_dataStart8[i] = suffix[i - start];
m_dataStart8[length - 1] = 0;
m_is8BitSource = true;
m_currentCharacter8 = m_dataStart8.get();
m_currentCharacter16 = 0;
setTokenStart<LChar>(m_currentCharacter8);
m_lexFunc = &CSSTokenizer::realLex<LChar>;
return;
}
m_dataStart16 = adoptArrayPtr(new UChar[length]);
for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
m_dataStart16[i] = prefix[i];
ASSERT(stringLength);
memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));
unsigned start = m_parsedTextPrefixLength + stringLength;
unsigned end = start + suffixLength;
for (unsigned i = start; i < end; i++)
m_dataStart16[i] = suffix[i - start];
m_dataStart16[length - 1] = 0;
m_is8BitSource = false;
m_currentCharacter8 = 0;
m_currentCharacter16 = m_dataStart16.get();
setTokenStart<UChar>(m_currentCharacter16);
m_lexFunc = &CSSTokenizer::realLex<UChar>;
}
}