This source file includes following definitions.
- SimplifyRegexp
- ComputeSimple
- Simplify
- Copy
- ShortVisit
- PreVisit
- PostVisit
- Concat2
- SimplifyRepeat
- SimplifyCharClass
#include "util/util.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
namespace re2 {
bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
string* dst,
RegexpStatus* status) {
Regexp* re = Parse(src, flags, status);
if (re == NULL)
return false;
Regexp* sre = re->Simplify();
re->Decref();
if (sre == NULL) {
LOG(ERROR) << "Simplify failed on " << src;
if (status) {
status->set_code(kRegexpInternalError);
status->set_error_arg(src);
}
return false;
}
*dst = sre->ToString();
sre->Decref();
return true;
}
bool Regexp::ComputeSimple() {
Regexp** subs;
switch (op_) {
case kRegexpNoMatch:
case kRegexpEmptyMatch:
case kRegexpLiteral:
case kRegexpLiteralString:
case kRegexpBeginLine:
case kRegexpEndLine:
case kRegexpBeginText:
case kRegexpWordBoundary:
case kRegexpNoWordBoundary:
case kRegexpEndText:
case kRegexpAnyChar:
case kRegexpAnyByte:
case kRegexpHaveMatch:
return true;
case kRegexpConcat:
case kRegexpAlternate:
subs = sub();
for (int i = 0; i < nsub_; i++)
if (!subs[i]->simple_)
return false;
return true;
case kRegexpCharClass:
if (ccb_ != NULL)
return !ccb_->empty() && !ccb_->full();
return !cc_->empty() && !cc_->full();
case kRegexpCapture:
subs = sub();
return subs[0]->simple_;
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
subs = sub();
if (!subs[0]->simple_)
return false;
switch (subs[0]->op_) {
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
case kRegexpEmptyMatch:
case kRegexpNoMatch:
return false;
default:
break;
}
return true;
case kRegexpRepeat:
return false;
}
LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
return false;
}
class SimplifyWalker : public Regexp::Walker<Regexp*> {
public:
SimplifyWalker() {}
virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
virtual Regexp* PostVisit(Regexp* re,
Regexp* parent_arg,
Regexp* pre_arg,
Regexp** child_args, int nchild_args);
virtual Regexp* Copy(Regexp* re);
virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
private:
static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
Regexp::ParseFlags parse_flags);
static Regexp* SimplifyCharClass(Regexp* re);
DISALLOW_EVIL_CONSTRUCTORS(SimplifyWalker);
};
Regexp* Regexp::Simplify() {
if (simple_)
return Incref();
SimplifyWalker w;
return w.Walk(this, NULL);
}
#define Simplify DontCallSimplify
Regexp* SimplifyWalker::Copy(Regexp* re) {
return re->Incref();
}
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
return re->Incref();
}
Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
if (re->simple_) {
*stop = true;
return re->Incref();
}
return NULL;
}
Regexp* SimplifyWalker::PostVisit(Regexp* re,
Regexp* parent_arg,
Regexp* pre_arg,
Regexp** child_args,
int nchild_args) {
switch (re->op()) {
case kRegexpNoMatch:
case kRegexpEmptyMatch:
case kRegexpLiteral:
case kRegexpLiteralString:
case kRegexpBeginLine:
case kRegexpEndLine:
case kRegexpBeginText:
case kRegexpWordBoundary:
case kRegexpNoWordBoundary:
case kRegexpEndText:
case kRegexpAnyChar:
case kRegexpAnyByte:
case kRegexpHaveMatch:
re->simple_ = true;
return re->Incref();
case kRegexpConcat:
case kRegexpAlternate: {
bool changed = false;
Regexp** subs = re->sub();
for (int i = 0; i < re->nsub_; i++) {
Regexp* sub = subs[i];
Regexp* newsub = child_args[i];
if (newsub != sub) {
changed = true;
break;
}
}
if (!changed) {
for (int i = 0; i < re->nsub_; i++) {
Regexp* newsub = child_args[i];
newsub->Decref();
}
re->simple_ = true;
return re->Incref();
}
Regexp* nre = new Regexp(re->op(), re->parse_flags());
nre->AllocSub(re->nsub_);
Regexp** nre_subs = nre->sub();
for (int i = 0; i <re->nsub_; i++)
nre_subs[i] = child_args[i];
nre->simple_ = true;
return nre;
}
case kRegexpCapture: {
Regexp* newsub = child_args[0];
if (newsub == re->sub()[0]) {
newsub->Decref();
re->simple_ = true;
return re->Incref();
}
Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
nre->AllocSub(1);
nre->sub()[0] = newsub;
nre->cap_ = re->cap_;
nre->simple_ = true;
return nre;
}
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest: {
Regexp* newsub = child_args[0];
if (newsub->op() == kRegexpEmptyMatch)
return newsub;
if (newsub == re->sub()[0]) {
newsub->Decref();
re->simple_ = true;
return re->Incref();
}
if (re->op() == newsub->op() &&
re->parse_flags() == newsub->parse_flags())
return newsub;
Regexp* nre = new Regexp(re->op(), re->parse_flags());
nre->AllocSub(1);
nre->sub()[0] = newsub;
nre->simple_ = true;
return nre;
}
case kRegexpRepeat: {
Regexp* newsub = child_args[0];
if (newsub->op() == kRegexpEmptyMatch)
return newsub;
Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
re->parse_flags());
newsub->Decref();
nre->simple_ = true;
return nre;
}
case kRegexpCharClass: {
Regexp* nre = SimplifyCharClass(re);
nre->simple_ = true;
return nre;
}
}
LOG(ERROR) << "Simplify case not handled: " << re->op();
return re->Incref();
}
Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
Regexp::ParseFlags parse_flags) {
Regexp* re = new Regexp(kRegexpConcat, parse_flags);
re->AllocSub(2);
Regexp** subs = re->sub();
subs[0] = re1;
subs[1] = re2;
return re;
}
Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
Regexp::ParseFlags f) {
if (max == -1) {
if (min == 0)
return Regexp::Star(re->Incref(), f);
if (min == 1)
return Regexp::Plus(re->Incref(), f);
Regexp* nre = new Regexp(kRegexpConcat, f);
nre->AllocSub(min);
VLOG(1) << "Simplify " << min;
Regexp** nre_subs = nre->sub();
for (int i = 0; i < min-1; i++)
nre_subs[i] = re->Incref();
nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
return nre;
}
if (min == 0 && max == 0)
return new Regexp(kRegexpEmptyMatch, f);
if (min == 1 && max == 1)
return re->Incref();
Regexp* nre = NULL;
if (min > 0) {
nre = new Regexp(kRegexpConcat, f);
nre->AllocSub(min);
Regexp** nre_subs = nre->sub();
for (int i = 0; i < min; i++)
nre_subs[i] = re->Incref();
}
if (max > min) {
Regexp* suf = Regexp::Quest(re->Incref(), f);
for (int i = min+1; i < max; i++)
suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
if (nre == NULL)
nre = suf;
else
nre = Concat2(nre, suf, f);
}
if (nre == NULL) {
LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
return new Regexp(kRegexpNoMatch, f);
}
return nre;
}
Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
CharClass* cc = re->cc();
if (cc->empty())
return new Regexp(kRegexpNoMatch, re->parse_flags());
if (cc->full())
return new Regexp(kRegexpAnyChar, re->parse_flags());
return re->Incref();
}
}