This source file includes following definitions.
- charCat
- setUpAxisNamesMap
- isAxisName
- isNodeTypeName
- isBinaryOperatorContext
- skipWS
- makeTokenAndAdvance
- makeTokenAndAdvance
- makeTokenAndAdvance
- peekAheadHelper
- peekCurHelper
- lexString
- lexNumber
- lexNCName
- lexQName
- nextTokenInternal
- nextToken
- reset
- lex
- expandQName
- parseStatement
- registerParseNode
- unregisterParseNode
- registerPredicateVector
- deletePredicateVector
- registerExpressionVector
- deleteExpressionVector
- registerString
- deleteString
- registerNodeTest
- deleteNodeTest
#include "config.h"
#include "core/xml/XPathParser.h"
#include "bindings/v8/ExceptionState.h"
#include "core/dom/ExceptionCode.h"
#include "core/xml/XPathEvaluator.h"
#include "core/xml/XPathNSResolver.h"
#include "core/xml/XPathPath.h"
#include "wtf/StdLibExtras.h"
#include "wtf/text/StringHash.h"
using namespace WebCore;
using namespace WTF;
using namespace Unicode;
using namespace XPath;
#include "XPathGrammar.h"
Parser* Parser::currentParser = 0;
enum XMLCat { NameStart, NameCont, NotPartOfName };
typedef HashMap<String, Step::Axis> AxisNamesMap;
static XMLCat charCat(UChar aChar)
{
if (aChar == '_')
return NameStart;
if (aChar == '.' || aChar == '-')
return NameCont;
CharCategory category = Unicode::category(aChar);
if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
return NameStart;
if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
return NameCont;
return NotPartOfName;
}
static void setUpAxisNamesMap(AxisNamesMap& axisNames)
{
struct AxisName {
const char* name;
Step::Axis axis;
};
const AxisName axisNameList[] = {
{ "ancestor", Step::AncestorAxis },
{ "ancestor-or-self", Step::AncestorOrSelfAxis },
{ "attribute", Step::AttributeAxis },
{ "child", Step::ChildAxis },
{ "descendant", Step::DescendantAxis },
{ "descendant-or-self", Step::DescendantOrSelfAxis },
{ "following", Step::FollowingAxis },
{ "following-sibling", Step::FollowingSiblingAxis },
{ "namespace", Step::NamespaceAxis },
{ "parent", Step::ParentAxis },
{ "preceding", Step::PrecedingAxis },
{ "preceding-sibling", Step::PrecedingSiblingAxis },
{ "self", Step::SelfAxis }
};
for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
axisNames.set(axisNameList[i].name, axisNameList[i].axis);
}
static bool isAxisName(const String& name, Step::Axis& type)
{
DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
if (axisNames.isEmpty())
setUpAxisNamesMap(axisNames);
AxisNamesMap::iterator it = axisNames.find(name);
if (it == axisNames.end())
return false;
type = it->value;
return true;
}
static bool isNodeTypeName(const String& name)
{
DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
if (nodeTypeNames.isEmpty()) {
nodeTypeNames.add("comment");
nodeTypeNames.add("text");
nodeTypeNames.add("processing-instruction");
nodeTypeNames.add("node");
}
return nodeTypeNames.contains(name);
}
bool Parser::isBinaryOperatorContext() const
{
switch (m_lastTokenType) {
case 0:
case '@': case AXISNAME: case '(': case '[': case ',':
case AND: case OR: case MULOP:
case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
case EQOP: case RELOP:
return false;
default:
return true;
}
}
void Parser::skipWS()
{
while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
++m_nextPos;
}
Token Parser::makeTokenAndAdvance(int code, int advance)
{
m_nextPos += advance;
return Token(code);
}
Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
{
m_nextPos += advance;
return Token(code, val);
}
Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
{
m_nextPos += advance;
return Token(code, val);
}
char Parser::peekAheadHelper()
{
if (m_nextPos + 1 >= m_data.length())
return 0;
UChar next = m_data[m_nextPos + 1];
if (next >= 0xff)
return 0;
return next;
}
char Parser::peekCurHelper()
{
if (m_nextPos >= m_data.length())
return 0;
UChar next = m_data[m_nextPos];
if (next >= 0xff)
return 0;
return next;
}
Token Parser::lexString()
{
UChar delimiter = m_data[m_nextPos];
int startPos = m_nextPos + 1;
for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
if (m_data[m_nextPos] == delimiter) {
String value = m_data.substring(startPos, m_nextPos - startPos);
if (value.isNull())
value = "";
++m_nextPos;
return Token(LITERAL, value);
}
}
return Token(XPATH_ERROR);
}
Token Parser::lexNumber()
{
int startPos = m_nextPos;
bool seenDot = false;
for (; m_nextPos < m_data.length(); ++m_nextPos) {
UChar aChar = m_data[m_nextPos];
if (aChar >= 0xff) break;
if (aChar < '0' || aChar > '9') {
if (aChar == '.' && !seenDot)
seenDot = true;
else
break;
}
}
return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
}
bool Parser::lexNCName(String& name)
{
int startPos = m_nextPos;
if (m_nextPos >= m_data.length())
return false;
if (charCat(m_data[m_nextPos]) != NameStart)
return false;
for (; m_nextPos < m_data.length(); ++m_nextPos)
if (charCat(m_data[m_nextPos]) == NotPartOfName)
break;
name = m_data.substring(startPos, m_nextPos - startPos);
return true;
}
bool Parser::lexQName(String& name)
{
String n1;
if (!lexNCName(n1))
return false;
skipWS();
if (peekAheadHelper() != ':') {
name = n1;
return true;
}
String n2;
if (!lexNCName(n2))
return false;
name = n1 + ":" + n2;
return true;
}
Token Parser::nextTokenInternal()
{
skipWS();
if (m_nextPos >= m_data.length())
return Token(0);
char code = peekCurHelper();
switch (code) {
case '(': case ')': case '[': case ']':
case '@': case ',': case '|':
return makeTokenAndAdvance(code);
case '\'':
case '\"':
return lexString();
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return lexNumber();
case '.': {
char next = peekAheadHelper();
if (next == '.')
return makeTokenAndAdvance(DOTDOT, 2);
if (next >= '0' && next <= '9')
return lexNumber();
return makeTokenAndAdvance('.');
}
case '/':
if (peekAheadHelper() == '/')
return makeTokenAndAdvance(SLASHSLASH, 2);
return makeTokenAndAdvance('/');
case '+':
return makeTokenAndAdvance(PLUS);
case '-':
return makeTokenAndAdvance(MINUS);
case '=':
return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
case '!':
if (peekAheadHelper() == '=')
return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
return Token(XPATH_ERROR);
case '<':
if (peekAheadHelper() == '=')
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
case '>':
if (peekAheadHelper() == '=')
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
case '*':
if (isBinaryOperatorContext())
return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
++m_nextPos;
return Token(NAMETEST, "*");
case '$': {
m_nextPos++;
String name;
if (!lexQName(name))
return Token(XPATH_ERROR);
return Token(VARIABLEREFERENCE, name);
}
}
String name;
if (!lexNCName(name))
return Token(XPATH_ERROR);
skipWS();
if (isBinaryOperatorContext()) {
if (name == "and")
return Token(AND);
if (name == "or")
return Token(OR);
if (name == "mod")
return Token(MULOP, NumericOp::OP_Mod);
if (name == "div")
return Token(MULOP, NumericOp::OP_Div);
}
if (peekCurHelper() == ':') {
m_nextPos++;
if (peekCurHelper() == ':') {
m_nextPos++;
Step::Axis axis;
if (isAxisName(name, axis))
return Token(AXISNAME, axis);
return Token(XPATH_ERROR);
}
skipWS();
if (peekCurHelper() == '*') {
m_nextPos++;
return Token(NAMETEST, name + ":*");
}
String n2;
if (!lexNCName(n2))
return Token(XPATH_ERROR);
name = name + ":" + n2;
}
skipWS();
if (peekCurHelper() == '(') {
if (isNodeTypeName(name)) {
if (name == "processing-instruction")
return Token(PI, name);
return Token(NODETYPE, name);
}
return Token(FUNCTIONNAME, name);
}
return Token(NAMETEST, name);
}
Token Parser::nextToken()
{
Token toRet = nextTokenInternal();
m_lastTokenType = toRet.type;
return toRet;
}
Parser::Parser()
{
reset(String());
}
Parser::~Parser()
{
}
void Parser::reset(const String& data)
{
m_nextPos = 0;
m_data = data;
m_lastTokenType = 0;
m_topExpr = 0;
m_gotNamespaceError = false;
}
int Parser::lex(void* data)
{
YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
Token tok = nextToken();
switch (tok.type) {
case AXISNAME:
yylval->axis = tok.axis;
break;
case MULOP:
yylval->numop = tok.numop;
break;
case RELOP:
case EQOP:
yylval->eqop = tok.eqop;
break;
case NODETYPE:
case PI:
case FUNCTIONNAME:
case LITERAL:
case VARIABLEREFERENCE:
case NUMBER:
case NAMETEST:
yylval->str = new String(tok.str);
registerString(yylval->str);
break;
}
return tok.type;
}
bool Parser::expandQName(const String& qName, AtomicString& localName, AtomicString& namespaceURI)
{
size_t colon = qName.find(':');
if (colon != kNotFound) {
if (!m_resolver)
return false;
namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
if (namespaceURI.isNull())
return false;
localName = AtomicString(qName.substring(colon + 1));
} else {
localName = AtomicString(qName);
}
return true;
}
Expression* Parser::parseStatement(const String& statement, PassRefPtrWillBeRawPtr<XPathNSResolver> resolver, ExceptionState& exceptionState)
{
reset(statement);
m_resolver = resolver;
Parser* oldParser = currentParser;
currentParser = this;
int parseError = xpathyyparse(this);
currentParser = oldParser;
if (parseError) {
deleteAllValues(m_parseNodes);
m_parseNodes.clear();
HashSet<Vector<OwnPtr<Predicate> >*>::iterator pend = m_predicateVectors.end();
for (HashSet<Vector<OwnPtr<Predicate> >*>::iterator it = m_predicateVectors.begin(); it != pend; ++it)
delete *it;
m_predicateVectors.clear();
HashSet<Vector<OwnPtr<Expression> >*>::iterator eend = m_expressionVectors.end();
for (HashSet<Vector<OwnPtr<Expression> >*>::iterator it = m_expressionVectors.begin(); it != eend; ++it)
delete *it;
m_expressionVectors.clear();
deleteAllValues(m_strings);
m_strings.clear();
deleteAllValues(m_nodeTests);
m_nodeTests.clear();
m_topExpr = 0;
if (m_gotNamespaceError)
exceptionState.throwDOMException(NamespaceError, "The string '" + statement + "' contains unresolvable namespaces.");
else
exceptionState.throwDOMException(SyntaxError, "The string '" + statement + "' is not a valid XPath expression.");
return 0;
}
ASSERT(m_parseNodes.size() == 1);
ASSERT(*m_parseNodes.begin() == m_topExpr);
ASSERT(m_expressionVectors.size() == 0);
ASSERT(m_predicateVectors.size() == 0);
ASSERT(m_strings.size() == 0);
ASSERT(m_nodeTests.size() == 0);
m_parseNodes.clear();
Expression* result = m_topExpr;
m_topExpr = 0;
return result;
}
void Parser::registerParseNode(ParseNode* node)
{
if (node == 0)
return;
ASSERT(!m_parseNodes.contains(node));
m_parseNodes.add(node);
}
void Parser::unregisterParseNode(ParseNode* node)
{
if (node == 0)
return;
ASSERT(m_parseNodes.contains(node));
m_parseNodes.remove(node);
}
void Parser::registerPredicateVector(Vector<OwnPtr<Predicate> >* vector)
{
if (vector == 0)
return;
ASSERT(!m_predicateVectors.contains(vector));
m_predicateVectors.add(vector);
}
void Parser::deletePredicateVector(Vector<OwnPtr<Predicate> >* vector)
{
if (vector == 0)
return;
ASSERT(m_predicateVectors.contains(vector));
m_predicateVectors.remove(vector);
delete vector;
}
void Parser::registerExpressionVector(Vector<OwnPtr<Expression> >* vector)
{
if (vector == 0)
return;
ASSERT(!m_expressionVectors.contains(vector));
m_expressionVectors.add(vector);
}
void Parser::deleteExpressionVector(Vector<OwnPtr<Expression> >* vector)
{
if (vector == 0)
return;
ASSERT(m_expressionVectors.contains(vector));
m_expressionVectors.remove(vector);
delete vector;
}
void Parser::registerString(String* s)
{
if (s == 0)
return;
ASSERT(!m_strings.contains(s));
m_strings.add(s);
}
void Parser::deleteString(String* s)
{
if (s == 0)
return;
ASSERT(m_strings.contains(s));
m_strings.remove(s);
delete s;
}
void Parser::registerNodeTest(Step::NodeTest* t)
{
if (t == 0)
return;
ASSERT(!m_nodeTests.contains(t));
m_nodeTests.add(t);
}
void Parser::deleteNodeTest(Step::NodeTest* t)
{
if (t == 0)
return;
ASSERT(m_nodeTests.contains(t));
m_nodeTests.remove(t);
delete t;
}