This source file includes following definitions.
- ShortVisit
- ToString
- PreVisit
- AppendLiteral
- PostVisit
- AppendCCChar
- AppendCCRange
#include "util/util.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
namespace re2 {
enum {
PrecAtom,
PrecUnary,
PrecConcat,
PrecAlternate,
PrecEmpty,
PrecParen,
PrecToplevel,
};
static void AppendCCRange(string* t, Rune lo, Rune hi);
class ToStringWalker : public Regexp::Walker<int> {
public:
explicit ToStringWalker(string* t) : t_(t) {}
virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
int* child_args, int nchild_args);
virtual int ShortVisit(Regexp* re, int parent_arg) {
return 0;
}
private:
string* t_;
DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker);
};
string Regexp::ToString() {
string t;
ToStringWalker w(&t);
w.WalkExponential(this, PrecToplevel, 100000);
if (w.stopped_early())
t += " [truncated]";
return t;
}
#define ToString DontCallToString
int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
int prec = parent_arg;
int nprec = PrecAtom;
switch (re->op()) {
case kRegexpNoMatch:
case kRegexpEmptyMatch:
case kRegexpLiteral:
case kRegexpAnyChar:
case kRegexpAnyByte:
case kRegexpBeginLine:
case kRegexpEndLine:
case kRegexpBeginText:
case kRegexpEndText:
case kRegexpWordBoundary:
case kRegexpNoWordBoundary:
case kRegexpCharClass:
case kRegexpHaveMatch:
nprec = PrecAtom;
break;
case kRegexpConcat:
case kRegexpLiteralString:
if (prec < PrecConcat)
t_->append("(?:");
nprec = PrecConcat;
break;
case kRegexpAlternate:
if (prec < PrecAlternate)
t_->append("(?:");
nprec = PrecAlternate;
break;
case kRegexpCapture:
t_->append("(");
if (re->name()) {
t_->append("?P<");
t_->append(*re->name());
t_->append(">");
}
nprec = PrecParen;
break;
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
case kRegexpRepeat:
if (prec < PrecUnary)
t_->append("(?:");
nprec = PrecAtom;
break;
}
return nprec;
}
static void AppendLiteral(string *t, Rune r, bool foldcase) {
if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
t->append(1, '\\');
t->append(1, r);
} else if (foldcase && 'a' <= r && r <= 'z') {
if ('a' <= r && r <= 'z')
r += 'A' - 'a';
t->append(1, '[');
t->append(1, r);
t->append(1, r + 'a' - 'A');
t->append(1, ']');
} else {
AppendCCRange(t, r, r);
}
}
int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
int* child_args, int nchild_args) {
int prec = parent_arg;
switch (re->op()) {
case kRegexpNoMatch:
t_->append("[^\\x00-\\x{10ffff}]");
break;
case kRegexpEmptyMatch:
if (prec < PrecEmpty)
t_->append("(?:)");
break;
case kRegexpLiteral:
AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
break;
case kRegexpLiteralString:
for (int i = 0; i < re->nrunes(); i++)
AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
if (prec < PrecConcat)
t_->append(")");
break;
case kRegexpConcat:
if (prec < PrecConcat)
t_->append(")");
break;
case kRegexpAlternate:
if ((*t_)[t_->size()-1] == '|')
t_->erase(t_->size()-1);
else
LOG(DFATAL) << "Bad final char: " << t_;
if (prec < PrecAlternate)
t_->append(")");
break;
case kRegexpStar:
t_->append("*");
if (re->parse_flags() & Regexp::NonGreedy)
t_->append("?");
if (prec < PrecUnary)
t_->append(")");
break;
case kRegexpPlus:
t_->append("+");
if (re->parse_flags() & Regexp::NonGreedy)
t_->append("?");
if (prec < PrecUnary)
t_->append(")");
break;
case kRegexpQuest:
t_->append("?");
if (re->parse_flags() & Regexp::NonGreedy)
t_->append("?");
if (prec < PrecUnary)
t_->append(")");
break;
case kRegexpRepeat:
if (re->max() == -1)
t_->append(StringPrintf("{%d,}", re->min()));
else if (re->min() == re->max())
t_->append(StringPrintf("{%d}", re->min()));
else
t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
if (re->parse_flags() & Regexp::NonGreedy)
t_->append("?");
if (prec < PrecUnary)
t_->append(")");
break;
case kRegexpAnyChar:
t_->append(".");
break;
case kRegexpAnyByte:
t_->append("\\C");
break;
case kRegexpBeginLine:
t_->append("^");
break;
case kRegexpEndLine:
t_->append("$");
break;
case kRegexpBeginText:
t_->append("(?-m:^)");
break;
case kRegexpEndText:
if (re->parse_flags() & Regexp::WasDollar)
t_->append("(?-m:$)");
else
t_->append("\\z");
break;
case kRegexpWordBoundary:
t_->append("\\b");
break;
case kRegexpNoWordBoundary:
t_->append("\\B");
break;
case kRegexpCharClass: {
if (re->cc()->size() == 0) {
t_->append("[^\\x00-\\x{10ffff}]");
break;
}
t_->append("[");
CharClass* cc = re->cc();
if (cc->Contains(0xFFFE)) {
cc = cc->Negate();
t_->append("^");
}
for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
AppendCCRange(t_, i->lo, i->hi);
if (cc != re->cc())
cc->Delete();
t_->append("]");
break;
}
case kRegexpCapture:
t_->append(")");
break;
case kRegexpHaveMatch:
t_->append("(?HaveMatch:%d)", re->match_id());
break;
}
if (prec == PrecAlternate)
t_->append("|");
return 0;
}
static void AppendCCChar(string* t, Rune r) {
if (0x20 <= r && r <= 0x7E) {
if (strchr("[]^-\\", r))
t->append("\\");
t->append(1, r);
return;
}
switch (r) {
default:
break;
case '\r':
t->append("\\r");
return;
case '\t':
t->append("\\t");
return;
case '\n':
t->append("\\n");
return;
case '\f':
t->append("\\f");
return;
}
if (r < 0x100) {
StringAppendF(t, "\\x%02x", static_cast<int>(r));
return;
}
StringAppendF(t, "\\x{%x}", static_cast<int>(r));
}
static void AppendCCRange(string* t, Rune lo, Rune hi) {
if (lo > hi)
return;
AppendCCChar(t, lo);
if (lo < hi) {
t->append("-");
AppendCCChar(t, hi);
}
}
}