This source file includes following definitions.
- nvisited_
- Search
- Visit
- UnsafeSearchBacktrack
#include "util/util.h"
#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
class Backtracker {
public:
explicit Backtracker(Prog* prog);
~Backtracker();
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch);
private:
bool Visit(int id, const char* p);
Prog* prog_;
StringPiece text_;
StringPiece context_;
bool anchored_;
bool longest_;
bool endmatch_;
StringPiece *submatch_;
int nsubmatch_;
const char* cap_[64];
uint32 *visited_;
int nvisited_;
};
Backtracker::Backtracker(Prog* prog)
: prog_(prog),
anchored_(false),
longest_(false),
endmatch_(false),
submatch_(NULL),
nsubmatch_(0),
visited_(NULL),
nvisited_(0) {
}
Backtracker::~Backtracker() {
delete[] visited_;
}
bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch) {
text_ = text;
context_ = context;
if (context_.begin() == NULL)
context_ = text;
if (prog_->anchor_start() && text.begin() > context_.begin())
return false;
if (prog_->anchor_end() && text.end() < context_.end())
return false;
anchored_ = anchored | prog_->anchor_start();
longest_ = longest | prog_->anchor_end();
endmatch_ = prog_->anchor_end();
submatch_ = submatch;
nsubmatch_ = nsubmatch;
CHECK(2*nsubmatch_ < arraysize(cap_));
memset(cap_, 0, sizeof cap_);
StringPiece sp0;
if (nsubmatch < 1) {
submatch_ = &sp0;
nsubmatch_ = 1;
}
submatch_[0] = NULL;
delete[] visited_;
nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
visited_ = new uint32[nvisited_];
memset(visited_, 0, nvisited_*sizeof visited_[0]);
if (anchored_) {
cap_[0] = text.begin();
return Visit(prog_->start(), text.begin());
}
for (const char* p = text.begin(); p <= text.end(); p++) {
cap_[0] = p;
if (Visit(prog_->start(), p))
return true;
}
return false;
}
bool Backtracker::Visit(int id, const char* p) {
CHECK(p <= text_.end());
int n = id*(text_.size()+1) + (p - text_.begin());
CHECK_LT(n/32, nvisited_);
if (visited_[n/32] & (1 << (n&31)))
return false;
visited_[n/32] |= 1 << (n&31);
int c = -1;
if (p < text_.end())
c = *p & 0xFF;
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
return false;
case kInstAlt:
case kInstAltMatch:
if (Visit(ip->out(), p)) {
if (longest_)
Visit(ip->out1(), p);
return true;
}
return Visit(ip->out1(), p);
case kInstByteRange:
if (ip->Matches(c))
return Visit(ip->out(), p+1);
return false;
case kInstCapture:
if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {
const char* q = cap_[ip->cap()];
cap_[ip->cap()] = p;
bool ret = Visit(ip->out(), p);
cap_[ip->cap()] = q;
return ret;
}
return Visit(ip->out(), p);
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
return false;
return Visit(ip->out(), p);
case kInstNop:
return Visit(ip->out(), p);
case kInstMatch:
if (endmatch_ && p != context_.end())
return false;
cap_[1] = p;
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].end())) {
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
}
return true;
case kInstFail:
return false;
}
}
bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
const StringPiece& context,
Anchor anchor,
MatchKind kind,
StringPiece* match,
int nmatch) {
StringPiece sp0;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch < 1) {
match = &sp0;
nmatch = 1;
}
}
Backtracker b(this);
bool anchored = anchor == kAnchored;
bool longest = kind != kFirstMatch;
if (!b.Search(text, context, anchored, longest, match, nmatch))
return false;
if (kind == kFullMatch && match[0].end() != text.end())
return false;
return true;
}
}