This source file includes following definitions.
- maxjob_
- ShouldVisit
- GrowStack
- Push
- TrySearch
- Search
- SearchBitState
#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
struct Job {
int id;
int arg;
const char* p;
};
class BitState {
public:
explicit BitState(Prog* prog);
~BitState();
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch);
private:
inline bool ShouldVisit(int id, const char* p);
void Push(int id, const char* p, int arg);
bool GrowStack();
bool TrySearch(int id, const char* p);
Prog* prog_;
StringPiece text_;
StringPiece context_;
bool anchored_;
bool longest_;
bool endmatch_;
StringPiece *submatch_;
int nsubmatch_;
const char** cap_;
int ncap_;
static const int VisitedBits = 32;
uint32 *visited_;
int nvisited_;
Job *job_;
int njob_;
int maxjob_;
};
BitState::BitState(Prog* prog)
: prog_(prog),
anchored_(false),
longest_(false),
endmatch_(false),
submatch_(NULL),
nsubmatch_(0),
cap_(NULL),
ncap_(0),
visited_(NULL),
nvisited_(0),
job_(NULL),
njob_(0),
maxjob_(0) {
}
BitState::~BitState() {
delete[] visited_;
delete[] job_;
delete[] cap_;
}
bool BitState::ShouldVisit(int id, const char* p) {
uint n = id * (text_.size() + 1) + (p - text_.begin());
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
return false;
visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
return true;
}
bool BitState::GrowStack() {
maxjob_ *= 2;
Job* newjob = new Job[maxjob_];
memmove(newjob, job_, njob_*sizeof job_[0]);
delete[] job_;
job_ = newjob;
if (njob_ >= maxjob_) {
LOG(DFATAL) << "Job stack overflow.";
return false;
}
return true;
}
void BitState::Push(int id, const char* p, int arg) {
if (njob_ >= maxjob_) {
if (!GrowStack())
return;
}
int op = prog_->inst(id)->opcode();
if (op == kInstFail)
return;
if (arg == 0 && !ShouldVisit(id, p))
return;
Job* j = &job_[njob_++];
j->id = id;
j->p = p;
j->arg = arg;
}
bool BitState::TrySearch(int id0, const char* p0) {
bool matched = false;
const char* end = text_.end();
njob_ = 0;
Push(id0, p0, 0);
while (njob_ > 0) {
--njob_;
int id = job_[njob_].id;
const char* p = job_[njob_].p;
int arg = job_[njob_].arg;
if (0) {
CheckAndLoop:
if (!ShouldVisit(id, p))
continue;
}
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
case kInstFail:
default:
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
return false;
case kInstAlt:
switch (arg) {
case 0:
Push(id, p, 1);
id = ip->out();
goto CheckAndLoop;
case 1:
arg = 0;
id = ip->out1();
goto CheckAndLoop;
}
LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
continue;
case kInstAltMatch:
if (ip->greedy(prog_)) {
Push(ip->out1(), p, 0);
id = ip->out1();
p = end;
goto CheckAndLoop;
}
Push(ip->out(), end, 0);
id = ip->out();
goto CheckAndLoop;
case kInstByteRange: {
int c = -1;
if (p < end)
c = *p & 0xFF;
if (ip->Matches(c)) {
id = ip->out();
p++;
goto CheckAndLoop;
}
continue;
}
case kInstCapture:
switch (arg) {
case 0:
if (0 <= ip->cap() && ip->cap() < ncap_) {
Push(id, cap_[ip->cap()], 1);
cap_[ip->cap()] = p;
}
id = ip->out();
goto CheckAndLoop;
case 1:
cap_[ip->cap()] = p;
continue;
}
LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
continue;
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
continue;
id = ip->out();
goto CheckAndLoop;
case kInstNop:
id = ip->out();
goto CheckAndLoop;
case kInstMatch: {
if (endmatch_ && p != text_.end())
continue;
if (nsubmatch_ == 0)
return true;
matched = true;
cap_[1] = p;
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].end())) {
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
}
if (!longest_)
return true;
if (p == text_.end())
return true;
continue;
}
}
}
return matched;
}
bool BitState::Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch) {
text_ = text;
context_ = context;
if (context_.begin() == NULL)
context_ = text;
if (prog_->anchor_start() && context_.begin() != text.begin())
return false;
if (prog_->anchor_end() && context_.end() != text.end())
return false;
anchored_ = anchored || prog_->anchor_start();
longest_ = longest || prog_->anchor_end();
endmatch_ = prog_->anchor_end();
submatch_ = submatch;
nsubmatch_ = nsubmatch;
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] = NULL;
nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;
visited_ = new uint32[nvisited_];
memset(visited_, 0, nvisited_*sizeof visited_[0]);
ncap_ = 2*nsubmatch;
if (ncap_ < 2)
ncap_ = 2;
cap_ = new const char*[ncap_];
memset(cap_, 0, ncap_*sizeof cap_[0]);
maxjob_ = 256;
job_ = new Job[maxjob_];
if (anchored_) {
cap_[0] = text.begin();
return TrySearch(prog_->start(), text.begin());
}
for (const char* p = text.begin(); p <= text.end(); p++) {
cap_[0] = p;
if (TrySearch(prog_->start(), p))
return true;
}
return false;
}
bool Prog::SearchBitState(const StringPiece& text,
const StringPiece& context,
Anchor anchor,
MatchKind kind,
StringPiece* match,
int nmatch) {
StringPiece sp0;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch < 1) {
match = &sp0;
nmatch = 1;
}
}
BitState b(this);
bool anchored = anchor == kAnchored;
bool longest = kind != kFirstMatch;
if (!b.Search(text, context, anchored, longest, match, nmatch))
return false;
if (kind == kFullMatch && match[0].end() != text.end())
return false;
return true;
}
}