This source file includes following definitions.
- one_line_
- InitEmpty
- RegexpErrorToRE2
- trunc
- ParseFlags
- Init
- ReverseProg
- ProgramSize
- NamedCapturingGroups
- CapturingGroupNames
- FullMatchN
- PartialMatchN
- ConsumeN
- FindAndConsumeN
- MaxSubmatch
- Replace
- GlobalReplace
- Extract
- QuoteMeta
- PossibleMatchRange
- ascii_strcasecmp
- Match
- DoMatch
- Rewrite
- NumberOfCapturingGroups
- CheckRewriteString
- parse_null
- parse_string
- parse_stringpiece
- parse_char
- parse_uchar
- TerminateNumber
- parse_long_radix
- parse_ulong_radix
- parse_short_radix
- parse_ushort_radix
- parse_int_radix
- parse_uint_radix
- parse_longlong_radix
- parse_ulonglong_radix
- parse_double_float
- parse_double
- parse_float
#include "re2/re2.h"
#include <stdio.h>
#include <string>
#ifdef WIN32
#define strtoll _strtoi64
#define strtoull _strtoui64
#define strtof strtod
#else
#include <pthread.h>
#endif
#include <errno.h>
#include "util/util.h"
#include "util/flags.h"
#include "re2/prog.h"
#include "re2/regexp.h"
DEFINE_bool(trace_re2, false, "trace RE2 execution");
namespace re2 {
static const int kMaxArgs = 16;
static const int kVecSize = 1+kMaxArgs;
const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch = {};
const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch = {};
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {};
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {};
#define kDefaultMaxMem (8<<20)
RE2::Options::Options()
: encoding_(EncodingUTF8),
posix_syntax_(false),
longest_match_(false),
log_errors_(true),
max_mem_(kDefaultMaxMem),
literal_(false),
never_nl_(false),
never_capture_(false),
case_sensitive_(true),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
}
RE2::Options::Options(RE2::CannedOptions opt)
: encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
posix_syntax_(opt == RE2::POSIX_SYNTAX),
longest_match_(opt == RE2::POSIX_SYNTAX),
log_errors_(opt != RE2::Quiet),
max_mem_(kDefaultMaxMem),
literal_(false),
never_nl_(false),
never_capture_(false),
case_sensitive_(true),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
}
GLOBAL_MUTEX(empty_mutex);
static const string *empty_string;
static const map<string, int> *empty_named_groups;
static const map<int, string> *empty_group_names;
static void InitEmpty() {
GLOBAL_MUTEX_LOCK(empty_mutex);
if (empty_string == NULL) {
empty_string = new string;
empty_named_groups = new map<string, int>;
empty_group_names = new map<int, string>;
}
GLOBAL_MUTEX_UNLOCK(empty_mutex);
}
static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
switch (code) {
case re2::kRegexpSuccess:
return RE2::NoError;
case re2::kRegexpInternalError:
return RE2::ErrorInternal;
case re2::kRegexpBadEscape:
return RE2::ErrorBadEscape;
case re2::kRegexpBadCharClass:
return RE2::ErrorBadCharClass;
case re2::kRegexpBadCharRange:
return RE2::ErrorBadCharRange;
case re2::kRegexpMissingBracket:
return RE2::ErrorMissingBracket;
case re2::kRegexpMissingParen:
return RE2::ErrorMissingParen;
case re2::kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
case re2::kRegexpRepeatArgument:
return RE2::ErrorRepeatArgument;
case re2::kRegexpRepeatSize:
return RE2::ErrorRepeatSize;
case re2::kRegexpRepeatOp:
return RE2::ErrorRepeatOp;
case re2::kRegexpBadPerlOp:
return RE2::ErrorBadPerlOp;
case re2::kRegexpBadUTF8:
return RE2::ErrorBadUTF8;
case re2::kRegexpBadNamedCapture:
return RE2::ErrorBadNamedCapture;
}
return RE2::ErrorInternal;
}
static string trunc(const StringPiece& pattern) {
if (pattern.size() < 100)
return pattern.as_string();
return pattern.substr(0, 100).as_string() + "...";
}
RE2::RE2(const char* pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const string& pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const StringPiece& pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const StringPiece& pattern, const Options& options) {
Init(pattern, options);
}
int RE2::Options::ParseFlags() const {
int flags = Regexp::ClassNL;
switch (encoding()) {
default:
if (log_errors())
LOG(ERROR) << "Unknown encoding " << encoding();
break;
case RE2::Options::EncodingUTF8:
break;
case RE2::Options::EncodingLatin1:
flags |= Regexp::Latin1;
break;
}
if (!posix_syntax())
flags |= Regexp::LikePerl;
if (literal())
flags |= Regexp::Literal;
if (never_nl())
flags |= Regexp::NeverNL;
if (never_capture())
flags |= Regexp::NeverCapture;
if (!case_sensitive())
flags |= Regexp::FoldCase;
if (perl_classes())
flags |= Regexp::PerlClasses;
if (word_boundary())
flags |= Regexp::PerlB;
if (one_line())
flags |= Regexp::OneLine;
return flags;
}
void RE2::Init(const StringPiece& pattern, const Options& options) {
mutex_ = new Mutex;
pattern_ = pattern.as_string();
options_.Copy(options);
InitEmpty();
error_ = empty_string;
error_code_ = NoError;
suffix_regexp_ = NULL;
entire_regexp_ = NULL;
prog_ = NULL;
rprog_ = NULL;
named_groups_ = NULL;
group_names_ = NULL;
num_captures_ = -1;
RegexpStatus status;
entire_regexp_ = Regexp::Parse(
pattern_,
static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
&status);
if (entire_regexp_ == NULL) {
if (error_ == empty_string)
error_ = new string(status.Text());
if (options_.log_errors()) {
LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
<< status.Text();
}
error_arg_ = status.error_arg().as_string();
error_code_ = RegexpErrorToRE2(status.code());
return;
}
prefix_.clear();
prefix_foldcase_ = false;
re2::Regexp* suffix;
if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
suffix_regexp_ = suffix;
else
suffix_regexp_ = entire_regexp_->Incref();
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
error_ = new string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
}
is_one_pass_ = prog_->IsOnePass();
}
re2::Prog* RE2::ReverseProg() const {
MutexLock l(mutex_);
if (rprog_ == NULL && error_ == empty_string) {
rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
if (rprog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'";
error_ = new string("pattern too large - reverse compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return NULL;
}
}
return rprog_;
}
RE2::~RE2() {
if (suffix_regexp_)
suffix_regexp_->Decref();
if (entire_regexp_)
entire_regexp_->Decref();
delete mutex_;
delete prog_;
delete rprog_;
if (error_ != empty_string)
delete error_;
if (named_groups_ != NULL && named_groups_ != empty_named_groups)
delete named_groups_;
if (group_names_ != NULL && group_names_ != empty_group_names)
delete group_names_;
}
int RE2::ProgramSize() const {
if (prog_ == NULL)
return -1;
return prog_->size();
}
const map<string, int>& RE2::NamedCapturingGroups() const {
MutexLock l(mutex_);
if (!ok())
return *empty_named_groups;
if (named_groups_ == NULL) {
named_groups_ = suffix_regexp_->NamedCaptures();
if (named_groups_ == NULL)
named_groups_ = empty_named_groups;
}
return *named_groups_;
}
const map<int, string>& RE2::CapturingGroupNames() const {
MutexLock l(mutex_);
if (!ok())
return *empty_group_names;
if (group_names_ == NULL) {
group_names_ = suffix_regexp_->CaptureNames();
if (group_names_ == NULL)
group_names_ = empty_group_names;
}
return *group_names_;
}
bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
const Arg* const args[], int n) {
return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
}
bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
const Arg* const args[], int n) {
return re.DoMatch(text, UNANCHORED, NULL, args, n);
}
bool RE2::ConsumeN(StringPiece* input, const RE2& re,
const Arg* const args[], int n) {
int consumed;
if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
const Arg* const args[], int n) {
int consumed;
if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
int RE2::MaxSubmatch(const StringPiece& rewrite) {
int max = 0;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
if (*s == '\\') {
s++;
int c = (s < end) ? *s : -1;
if (isdigit(c)) {
int n = (c - '0');
if (n > max)
max = n;
}
}
}
return max;
}
bool RE2::Replace(string *str,
const RE2& re,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
return false;
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
string s;
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
assert(vec[0].begin() >= str->data());
assert(vec[0].end() <= str->data()+str->size());
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
return true;
}
int RE2::GlobalReplace(string *str,
const RE2& re,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
return false;
const char* p = str->data();
const char* ep = p + str->size();
const char* lastend = NULL;
string out;
int count = 0;
while (p <= ep) {
if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
break;
if (p < vec[0].begin())
out.append(p, vec[0].begin() - p);
if (vec[0].begin() == lastend && vec[0].size() == 0) {
if (p < ep)
out.append(p, 1);
p++;
continue;
}
re.Rewrite(&out, rewrite, vec, nvec);
p = vec[0].end();
lastend = p;
count++;
}
if (count == 0)
return 0;
if (p < ep)
out.append(p, ep - p);
swap(out, *str);
return count;
}
bool RE2::Extract(const StringPiece &text,
const RE2& re,
const StringPiece &rewrite,
string *out) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
return false;
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
out->clear();
return re.Rewrite(out, rewrite, vec, nvec);
}
string RE2::QuoteMeta(const StringPiece& unquoted) {
string result;
result.reserve(unquoted.size() << 1);
for (int ii = 0; ii < unquoted.length(); ++ii) {
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
(unquoted[ii] < '0' || unquoted[ii] > '9') &&
unquoted[ii] != '_' &&
!(unquoted[ii] & 128)) {
if (unquoted[ii] == '\0') {
result += "\\x00";
continue;
}
result += '\\';
}
result += unquoted[ii];
}
return result;
}
bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
if (prog_ == NULL)
return false;
int n = prefix_.size();
if (n > maxlen)
n = maxlen;
string pmin, pmax;
pmin = prefix_.substr(0, n);
pmax = prefix_.substr(0, n);
if (prefix_foldcase_) {
for (int i = 0; i < n; i++) {
if ('a' <= pmin[i] && pmin[i] <= 'z')
pmin[i] += 'A' - 'a';
}
}
string dmin, dmax;
maxlen -= n;
if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
pmin += dmin;
pmax += dmax;
} else if (pmax.size() > 0) {
pmax = PrefixSuccessor(pmax);
} else {
*min = "";
*max = "";
return false;
}
*min = pmin;
*max = pmax;
return true;
}
static int ascii_strcasecmp(const char* a, const char* b, int len) {
const char *ae = a + len;
for (; a < ae; a++, b++) {
uint8 x = *a;
uint8 y = *b;
if ('A' <= y && y <= 'Z')
y += 'a' - 'A';
if (x != y)
return x - y;
}
return 0;
}
bool RE2::Match(const StringPiece& text,
int startpos,
int endpos,
Anchor re_anchor,
StringPiece* submatch,
int nsubmatch) const {
if (!ok() || suffix_regexp_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
if (startpos < 0 || startpos > endpos || endpos > text.size()) {
if (options_.log_errors())
LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
return false;
}
StringPiece subtext = text;
subtext.remove_prefix(startpos);
subtext.remove_suffix(text.size() - endpos);
StringPiece match;
StringPiece* matchp = &match;
if (nsubmatch == 0)
matchp = NULL;
int ncap = 1 + NumberOfCapturingGroups();
if (ncap > nsubmatch)
ncap = nsubmatch;
if (prog_->anchor_start() && startpos != 0)
return false;
if (prog_->anchor_start() && prog_->anchor_end())
re_anchor = ANCHOR_BOTH;
else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
re_anchor = ANCHOR_START;
int prefixlen = 0;
if (!prefix_.empty()) {
if (startpos != 0)
return false;
prefixlen = prefix_.size();
if (prefixlen > subtext.size())
return false;
if (prefix_foldcase_) {
if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
return false;
} else {
if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
return false;
}
subtext.remove_prefix(prefixlen);
if (re_anchor != ANCHOR_BOTH)
re_anchor = ANCHOR_START;
}
Prog::Anchor anchor = Prog::kUnanchored;
Prog::MatchKind kind = Prog::kFirstMatch;
if (options_.longest_match())
kind = Prog::kLongestMatch;
bool skipped_test = false;
bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
const int MaxBitStateProg = 500;
const int MaxBitStateVector = 256*1024;
bool can_bit_state = prog_->size() <= MaxBitStateProg;
int bit_state_text_max = MaxBitStateVector / prog_->size();
bool dfa_failed = false;
switch (re_anchor) {
default:
case UNANCHORED: {
if (!prog_->SearchDFA(subtext, text, anchor, kind,
matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
skipped_test = true;
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " DFA failed.";
break;
}
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " used DFA - no match.";
return false;
}
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " used DFA - match";
if (matchp == NULL)
return true;
Prog* prog = ReverseProg();
if (prog == NULL)
return false;
if (!prog->SearchDFA(match, text, Prog::kAnchored,
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
skipped_test = true;
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " reverse DFA failed.";
break;
}
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " DFA inconsistency.";
if (options_.log_errors())
LOG(ERROR) << "DFA inconsistency";
return false;
}
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " used reverse DFA.";
break;
}
case ANCHOR_BOTH:
case ANCHOR_START:
if (re_anchor == ANCHOR_BOTH)
kind = Prog::kFullMatch;
anchor = Prog::kAnchored;
if (can_one_pass && text.size() <= 4096 &&
(ncap > 1 || text.size() <= 8)) {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " skipping DFA for OnePass.";
skipped_test = true;
break;
}
if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " skipping DFA for BitState.";
skipped_test = true;
break;
}
if (!prog_->SearchDFA(subtext, text, anchor, kind,
&match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " DFA failed.";
skipped_test = true;
break;
}
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " used DFA - no match.";
return false;
}
break;
}
if (!skipped_test && ncap <= 1) {
if (ncap == 1)
submatch[0] = match;
} else {
StringPiece subtext1;
if (skipped_test) {
subtext1 = subtext;
} else {
subtext1 = match;
anchor = Prog::kAnchored;
kind = Prog::kFullMatch;
}
if (can_one_pass && anchor != Prog::kUnanchored) {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " using OnePass.";
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
} else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " using BitState.";
if (!prog_->SearchBitState(subtext1, text, anchor,
kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchBitState inconsistency";
return false;
}
} else {
if (FLAGS_trace_re2)
LOG(INFO) << "Match " << trunc(pattern_)
<< " [" << CEscape(subtext) << "]"
<< " using NFA.";
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchNFA inconsistency";
return false;
}
}
}
if (prefixlen > 0 && nsubmatch > 0)
submatch[0] = StringPiece(submatch[0].begin() - prefixlen,
submatch[0].size() + prefixlen);
for (int i = ncap; i < nsubmatch; i++)
submatch[i] = NULL;
return true;
}
bool RE2::DoMatch(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const* args,
int n) const {
if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
int nvec;
if (n == 0 && consumed == NULL)
nvec = 0;
else
nvec = n+1;
StringPiece* vec;
StringPiece stkvec[kVecSize];
StringPiece* heapvec = NULL;
if (nvec <= arraysize(stkvec)) {
vec = stkvec;
} else {
vec = new StringPiece[nvec];
heapvec = vec;
}
if (!Match(text, 0, text.size(), anchor, vec, nvec)) {
delete[] heapvec;
return false;
}
if(consumed != NULL)
*consumed = vec[0].end() - text.begin();
if (n == 0 || args == NULL) {
delete[] heapvec;
return true;
}
int ncap = NumberOfCapturingGroups();
if (ncap < n) {
VLOG(1) << "Asked for " << n << " but only have " << ncap;
delete[] heapvec;
return false;
}
for (int i = 0; i < n; i++) {
const StringPiece& s = vec[i+1];
if (!args[i]->Parse(s.data(), s.size())) {
VLOG(1) << "Parse error on #" << i << " " << s << " "
<< (void*)s.data() << "/" << s.size();
delete[] heapvec;
return false;
}
}
delete[] heapvec;
return true;
}
bool RE2::Rewrite(string *out, const StringPiece &rewrite,
const StringPiece *vec, int veclen) const {
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
int c = *s;
if (c == '\\') {
s++;
c = (s < end) ? *s : -1;
if (isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
LOG(ERROR) << "requested group " << n
<< " in regexp " << rewrite.data();
}
return false;
}
StringPiece snip = vec[n];
if (snip.size() > 0)
out->append(snip.data(), snip.size());
} else if (c == '\\') {
out->push_back('\\');
} else {
if (options_.log_errors())
LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
return false;
}
} else {
out->push_back(c);
}
}
return true;
}
int RE2::NumberOfCapturingGroups() const {
if (suffix_regexp_ == NULL)
return -1;
ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
" multiple threads end up doing the same work in parallel.");
if (num_captures_ == -1)
num_captures_ = suffix_regexp_->NumCaptures();
return num_captures_;
}
bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
int c = *s;
if (c != '\\') {
continue;
}
if (++s == end) {
*error = "Rewrite schema error: '\\' not allowed at end.";
return false;
}
c = *s;
if (c == '\\') {
continue;
}
if (!isdigit(c)) {
*error = "Rewrite schema error: "
"'\\' must be followed by a digit or '\\'.";
return false;
}
int n = (c - '0');
if (max_token < n) {
max_token = n;
}
}
if (max_token > NumberOfCapturingGroups()) {
SStringPrintf(error, "Rewrite schema requests %d matches, "
"but the regexp only has %d parenthesized subexpressions.",
max_token, NumberOfCapturingGroups());
return false;
}
return true;
}
bool RE2::Arg::parse_null(const char* str, int n, void* dest) {
return (dest == NULL);
}
bool RE2::Arg::parse_string(const char* str, int n, void* dest) {
if (dest == NULL) return true;
reinterpret_cast<string*>(dest)->assign(str, n);
return true;
}
bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) {
if (dest == NULL) return true;
reinterpret_cast<StringPiece*>(dest)->set(str, n);
return true;
}
bool RE2::Arg::parse_char(const char* str, int n, void* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
*(reinterpret_cast<char*>(dest)) = str[0];
return true;
}
bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
return true;
}
static const int kMaxNumberLength = 32;
static const char* TerminateNumber(char* buf, const char* str, int* np) {
int n = *np;
if (n <= 0) return "";
if (n > 0 && isspace(*str)) {
return "";
}
bool neg = false;
if (n >= 1 && str[0] == '-') {
neg = true;
n--;
str++;
}
if (n >= 3 && str[0] == '0' && str[1] == '0') {
while (n >= 3 && str[2] == '0') {
n--;
str++;
}
}
if (neg) {
n++;
str--;
}
if (n > kMaxNumberLength) return "";
memmove(buf, str, n);
if (neg) {
buf[0] = '-';
}
buf[n] = '\0';
*np = n;
return buf;
}
bool RE2::Arg::parse_long_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, &n);
char* end;
errno = 0;
long r = strtol(str, &end, radix);
if (end != str + n) return false;
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<long*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_ulong_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, &n);
if (str[0] == '-') {
return false;
}
char* end;
errno = 0;
unsigned long r = strtoul(str, &end, radix);
if (end != str + n) return false;
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned long*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_short_radix(const char* str,
int n,
void* dest,
int radix) {
long r;
if (!parse_long_radix(str, n, &r, radix)) return false;
if ((short)r != r) return false;
if (dest == NULL) return true;
*(reinterpret_cast<short*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_ushort_radix(const char* str,
int n,
void* dest,
int radix) {
unsigned long r;
if (!parse_ulong_radix(str, n, &r, radix)) return false;
if ((ushort)r != r) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned short*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_int_radix(const char* str,
int n,
void* dest,
int radix) {
long r;
if (!parse_long_radix(str, n, &r, radix)) return false;
if ((int)r != r) return false;
if (dest == NULL) return true;
*(reinterpret_cast<int*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_uint_radix(const char* str,
int n,
void* dest,
int radix) {
unsigned long r;
if (!parse_ulong_radix(str, n, &r, radix)) return false;
if ((uint)r != r) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned int*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_longlong_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, &n);
char* end;
errno = 0;
int64 r = strtoll(str, &end, radix);
if (end != str + n) return false;
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<int64*>(dest)) = r;
return true;
}
bool RE2::Arg::parse_ulonglong_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, &n);
if (str[0] == '-') {
return false;
}
char* end;
errno = 0;
uint64 r = strtoull(str, &end, radix);
if (end != str + n) return false;
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<uint64*>(dest)) = r;
return true;
}
static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
if (n == 0) return false;
static const int kMaxLength = 200;
char buf[kMaxLength];
if (n >= kMaxLength) return false;
memcpy(buf, str, n);
buf[n] = '\0';
errno = 0;
char* end;
double r;
if (isfloat) {
r = strtof(buf, &end);
} else {
r = strtod(buf, &end);
}
if (end != buf + n) return false;
if (errno) return false;
if (dest == NULL) return true;
if (isfloat) {
*(reinterpret_cast<float*>(dest)) = r;
} else {
*(reinterpret_cast<double*>(dest)) = r;
}
return true;
}
bool RE2::Arg::parse_double(const char* str, int n, void* dest) {
return parse_double_float(str, n, false, dest);
}
bool RE2::Arg::parse_float(const char* str, int n, void* dest) {
return parse_double_float(str, n, true, dest);
}
#define DEFINE_INTEGER_PARSERS(name) \
bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 10); \
} \
bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 16); \
} \
bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 8); \
} \
bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 0); \
}
DEFINE_INTEGER_PARSERS(short);
DEFINE_INTEGER_PARSERS(ushort);
DEFINE_INTEGER_PARSERS(int);
DEFINE_INTEGER_PARSERS(uint);
DEFINE_INTEGER_PARSERS(long);
DEFINE_INTEGER_PARSERS(ulong);
DEFINE_INTEGER_PARSERS(longlong);
DEFINE_INTEGER_PARSERS(ulonglong);
#undef DEFINE_INTEGER_PARSERS
}