root/third_party/re2/util/pcre.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. Init
  2. Compile
  3. Replace
  4. GlobalReplace
  5. Extract
  6. QuoteMeta
  7. HitLimit
  8. ClearHitLimit
  9. TryMatch
  10. DoMatchImpl
  11. DoMatch
  12. Rewrite
  13. CheckRewriteString
  14. NumberOfCapturingGroups
  15. parse_null
  16. parse_string
  17. parse_stringpiece
  18. parse_char
  19. parse_uchar
  20. TerminateNumber
  21. parse_long_radix
  22. parse_ulong_radix
  23. parse_short_radix
  24. parse_ushort_radix
  25. parse_int_radix
  26. parse_uint_radix
  27. parse_longlong_radix
  28. parse_ulonglong_radix
  29. parse_double
  30. parse_float

// Copyright 2003-2009 Google Inc.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
// The main changes are the addition of the HitLimit method and
// compilation as PCRE in namespace re2.

#include <errno.h>
#include "util/util.h"
#include "util/flags.h"
#include "util/pcre.h"

#ifdef WIN32
#define strtoll _strtoi64
#define strtoull _strtoui64
#endif

#define PCREPORT(level) LOG(level)

// Default PCRE limits.
// Defaults chosen to allow a plausible amount of CPU and
// not exceed main thread stacks.  Note that other threads
// often have smaller stacks, and therefore tightening
// regexp_stack_limit may frequently be necessary.
DEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)");
DEFINE_int32(regexp_match_limit, 1000000,
             "default PCRE match limit (function calls)");

namespace re2 {

// Maximum number of args we can set
static const int kMaxArgs = 16;
static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace

// Approximate size of a recursive invocation of PCRE's
// internal "match()" frame.  This varies depending on the
// compiler and architecture, of course, so the constant is
// just a conservative estimate.  To find the exact number,
// run regexp_unittest with --regexp_stack_limit=0 under
// a debugger and look at the frames when it crashes.
// The exact frame size was 656 in production on 2008/02/03.
static const int kPCREFrameSize = 700;

// Special name for missing C++ arguments.
PCRE::Arg PCRE::no_more_args((void*)NULL);

const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
const PCRE::ConsumeFunctor PCRE::Consume = { };
const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };

// If a regular expression has no error, its error_ field points here
static const string empty_string;

void PCRE::Init(const char* pattern, Option options, int match_limit,
              int stack_limit, bool report_errors) {
  pattern_ = pattern;
  options_ = options;
  match_limit_ = match_limit;
  stack_limit_ = stack_limit;
  hit_limit_ = false;
  error_ = &empty_string;
  report_errors_ = report_errors;
  re_full_ = NULL;
  re_partial_ = NULL;

  if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
    error_ = new string("illegal regexp option");
    PCREPORT(ERROR)
        << "Error compiling '" << pattern << "': illegal regexp option";
  } else {
    re_partial_ = Compile(UNANCHORED);
    if (re_partial_ != NULL) {
      re_full_ = Compile(ANCHOR_BOTH);
    }
  }
}

PCRE::PCRE(const char* pattern) {
  Init(pattern, None, 0, 0, true);
}
PCRE::PCRE(const char* pattern, Option option) {
  Init(pattern, option, 0, 0, true);
}
PCRE::PCRE(const string& pattern) {
  Init(pattern.c_str(), None, 0, 0, true);
}
PCRE::PCRE(const string& pattern, Option option) {
  Init(pattern.c_str(), option, 0, 0, true);
}
PCRE::PCRE(const string& pattern, const PCRE_Options& re_option) {
  Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
       re_option.stack_limit(), re_option.report_errors());
}

PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
  Init(pattern, re_option.option(), re_option.match_limit(),
       re_option.stack_limit(), re_option.report_errors());
}

PCRE::~PCRE() {
  if (re_full_ != NULL)         pcre_free(re_full_);
  if (re_partial_ != NULL)      pcre_free(re_partial_);
  if (error_ != &empty_string)  delete error_;
}

pcre* PCRE::Compile(Anchor anchor) {
  // Special treatment for anchoring.  This is needed because at
  // runtime pcre only provides an option for anchoring at the
  // beginning of a string.
  //
  // There are three types of anchoring we want:
  //    UNANCHORED      Compile the original pattern, and use
  //                    a pcre unanchored match.
  //    ANCHOR_START    Compile the original pattern, and use
  //                    a pcre anchored match.
  //    ANCHOR_BOTH     Tack a "\z" to the end of the original pattern
  //                    and use a pcre anchored match.

  const char* error;
  int eoffset;
  pcre* re;
  if (anchor != ANCHOR_BOTH) {
    re = pcre_compile(pattern_.c_str(),
                      (options_ & EnabledCompileOptions),
                      &error, &eoffset, NULL);
  } else {
    // Tack a '\z' at the end of PCRE.  Parenthesize it first so that
    // the '\z' applies to all top-level alternatives in the regexp.
    string wrapped = "(?:";  // A non-counting grouping operator
    wrapped += pattern_;
    wrapped += ")\\z";
    re = pcre_compile(wrapped.c_str(),
                      (options_ & EnabledCompileOptions),
                      &error, &eoffset, NULL);
  }
  if (re == NULL) {
    if (error_ == &empty_string) error_ = new string(error);
    PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
  }
  return re;
}

/***** Convenience interfaces *****/

bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
                                       const PCRE& re,
                                       const Arg& a0,
                                       const Arg& a1,
                                       const Arg& a2,
                                       const Arg& a3,
                                       const Arg& a4,
                                       const Arg& a5,
                                       const Arg& a6,
                                       const Arg& a7,
                                       const Arg& a8,
                                       const Arg& a9,
                                       const Arg& a10,
                                       const Arg& a11,
                                       const Arg& a12,
                                       const Arg& a13,
                                       const Arg& a14,
                                       const Arg& a15) const {
  const Arg* args[kMaxArgs];
  int n = 0;
  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
done:

  int consumed;
  int vec[kVecSize];
  return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
}

bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
                                          const PCRE& re,
                                          const Arg& a0,
                                          const Arg& a1,
                                          const Arg& a2,
                                          const Arg& a3,
                                          const Arg& a4,
                                          const Arg& a5,
                                          const Arg& a6,
                                          const Arg& a7,
                                          const Arg& a8,
                                          const Arg& a9,
                                          const Arg& a10,
                                          const Arg& a11,
                                          const Arg& a12,
                                          const Arg& a13,
                                          const Arg& a14,
                                          const Arg& a15) const {
  const Arg* args[kMaxArgs];
  int n = 0;
  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
done:

  int consumed;
  int vec[kVecSize];
  return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
}

bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
                                     const PCRE& pattern,
                                     const Arg& a0,
                                     const Arg& a1,
                                     const Arg& a2,
                                     const Arg& a3,
                                     const Arg& a4,
                                     const Arg& a5,
                                     const Arg& a6,
                                     const Arg& a7,
                                     const Arg& a8,
                                     const Arg& a9,
                                     const Arg& a10,
                                     const Arg& a11,
                                     const Arg& a12,
                                     const Arg& a13,
                                     const Arg& a14,
                                     const Arg& a15) const {
  const Arg* args[kMaxArgs];
  int n = 0;
  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
done:

  int consumed;
  int vec[kVecSize];
  if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
                          args, n, vec, kVecSize)) {
    input->remove_prefix(consumed);
    return true;
  } else {
    return false;
  }
}

bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
                                            const PCRE& pattern,
                                            const Arg& a0,
                                            const Arg& a1,
                                            const Arg& a2,
                                            const Arg& a3,
                                            const Arg& a4,
                                            const Arg& a5,
                                            const Arg& a6,
                                            const Arg& a7,
                                            const Arg& a8,
                                            const Arg& a9,
                                            const Arg& a10,
                                            const Arg& a11,
                                            const Arg& a12,
                                            const Arg& a13,
                                            const Arg& a14,
                                            const Arg& a15) const {
  const Arg* args[kMaxArgs];
  int n = 0;
  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
done:

  int consumed;
  int vec[kVecSize];
  if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
                          args, n, vec, kVecSize)) {
    input->remove_prefix(consumed);
    return true;
  } else {
    return false;
  }
}

bool PCRE::Replace(string *str,
                 const PCRE& pattern,
                 const StringPiece& rewrite) {
  int vec[kVecSize];
  int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
  if (matches == 0)
    return false;

  string s;
  if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
    return false;

  assert(vec[0] >= 0);
  assert(vec[1] >= 0);
  str->replace(vec[0], vec[1] - vec[0], s);
  return true;
}

int PCRE::GlobalReplace(string *str,
                      const PCRE& pattern,
                      const StringPiece& rewrite) {
  int count = 0;
  int vec[kVecSize];
  string out;
  int start = 0;
  bool last_match_was_empty_string = false;

  for (; start <= str->length();) {
    // If the previous match was for the empty string, we shouldn't
    // just match again: we'll match in the same way and get an
    // infinite loop.  Instead, we do the match in a special way:
    // anchored -- to force another try at the same position --
    // and with a flag saying that this time, ignore empty matches.
    // If this special match returns, that means there's a non-empty
    // match at this position as well, and we can continue.  If not,
    // we do what perl does, and just advance by one.
    // Notice that perl prints '@@@' for this;
    //    perl -le '$_ = "aa"; s/b*|aa/@/g; print'
    int matches;
    if (last_match_was_empty_string) {
      matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
                                 vec, kVecSize);
      if (matches <= 0) {
        if (start < str->length())
          out.push_back((*str)[start]);
        start++;
        last_match_was_empty_string = false;
        continue;
      }
    } else {
      matches = pattern.TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
      if (matches <= 0)
        break;
    }
    int matchstart = vec[0], matchend = vec[1];
    assert(matchstart >= start);
    assert(matchend >= matchstart);

    out.append(*str, start, matchstart - start);
    pattern.Rewrite(&out, rewrite, *str, vec, matches);
    start = matchend;
    count++;
    last_match_was_empty_string = (matchstart == matchend);
  }

  if (count == 0)
    return 0;

  if (start < str->length())
    out.append(*str, start, str->length() - start);
  swap(out, *str);
  return count;
}

bool PCRE::Extract(const StringPiece &text,
                 const PCRE& pattern,
                 const StringPiece &rewrite,
                 string *out) {
  int vec[kVecSize];
  int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
  if (matches == 0)
    return false;
  out->clear();
  return pattern.Rewrite(out, rewrite, text, vec, matches);
}

string PCRE::QuoteMeta(const StringPiece& unquoted) {
  string result;
  result.reserve(unquoted.size() << 1);

  // Escape any ascii character not in [A-Za-z_0-9].
  //
  // Note that it's legal to escape a character even if it has no
  // special meaning in a regular expression -- so this function does
  // that.  (This also makes it identical to the perl function of the
  // same name except for the null-character special case;
  // see `perldoc -f quotemeta`.)
  for (int ii = 0; ii < unquoted.length(); ++ii) {
    // Note that using 'isalnum' here raises the benchmark time from
    // 32ns to 58ns:
    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
        unquoted[ii] != '_' &&
        // If this is the part of a UTF8 or Latin1 character, we need
        // to copy this byte without escaping.  Experimentally this is
        // what works correctly with the regexp library.
        !(unquoted[ii] & 128)) {
      if (unquoted[ii] == '\0') {  // Special handling for null chars.
        // Can't use "\\0" since the next character might be a digit.
        result += "\\x00";
        continue;
      }
      result += '\\';
    }
    result += unquoted[ii];
  }

  return result;
}

/***** Actual matching and rewriting code *****/

bool PCRE::HitLimit() {
  return hit_limit_;
}

void PCRE::ClearHitLimit() {
  hit_limit_ = 0;
}

int PCRE::TryMatch(const StringPiece& text,
                 int startpos,
                 Anchor anchor,
                 bool empty_ok,
                 int *vec,
                 int vecsize) const {
  pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
  if (re == NULL) {
    PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
    return 0;
  }

  int match_limit = match_limit_;
  if (match_limit <= 0) {
    match_limit = FLAGS_regexp_match_limit;
  }

  int stack_limit = stack_limit_;
  if (stack_limit <= 0) {
    stack_limit = FLAGS_regexp_stack_limit;
  }

  pcre_extra extra = { 0 };
  if (match_limit > 0) {
    extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
    extra.match_limit = match_limit;
  }
  if (stack_limit > 0) {
    extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    extra.match_limit_recursion = stack_limit / kPCREFrameSize;
  }

  int options = 0;
  if (anchor != UNANCHORED)
    options |= PCRE_ANCHORED;
  if (!empty_ok)
    options |= PCRE_NOTEMPTY;

  int rc = pcre_exec(re,              // The regular expression object
                     &extra,
                     (text.data() == NULL) ? "" : text.data(),
                     text.size(),
                     startpos,
                     options,
                     vec,
                     vecsize);

  // Handle errors
  if (rc == 0) {
    // pcre_exec() returns 0 as a special case when the number of
    // capturing subpatterns exceeds the size of the vector.
    // When this happens, there is a match and the output vector
    // is filled, but we miss out on the positions of the extra subpatterns.
    rc = vecsize / 2;
  } else if (rc < 0) {
    switch (rc) {
      case PCRE_ERROR_NOMATCH:
        return 0;
      case PCRE_ERROR_MATCHLIMIT:
        // Writing to hit_limit is not safe if multiple threads
        // are using the PCRE, but the flag is only intended
        // for use by unit tests anyway, so we let it go.
        hit_limit_ = true;
        PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
                        << " when matching '" << pattern_ << "'"
                        << " against text that is " << text.size() << " bytes.";
        return 0;
      case PCRE_ERROR_RECURSIONLIMIT:
        // See comment about hit_limit above.
        hit_limit_ = true;
        PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
                        << " when matching '" << pattern_ << "'"
                        << " against text that is " << text.size() << " bytes.";
        return 0;
      default:
        // There are other return codes from pcre.h :
        // PCRE_ERROR_NULL           (-2)
        // PCRE_ERROR_BADOPTION      (-3)
        // PCRE_ERROR_BADMAGIC       (-4)
        // PCRE_ERROR_UNKNOWN_NODE   (-5)
        // PCRE_ERROR_NOMEMORY       (-6)
        // PCRE_ERROR_NOSUBSTRING    (-7)
        // ...
        PCREPORT(ERROR) << "Unexpected return code: " << rc
                      << " when matching '" << pattern_ << "'"
                      << ", re=" << re
                      << ", text=" << text
                      << ", vec=" << vec
                      << ", vecsize=" << vecsize;
        return 0;
    }
  }

  return rc;
}

bool PCRE::DoMatchImpl(const StringPiece& text,
                     Anchor anchor,
                     int* consumed,
                     const Arg* const* args,
                     int n,
                     int* vec,
                     int vecsize) const {
  assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace
  int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
  assert(matches >= 0);  // TryMatch never returns negatives
  if (matches == 0)
    return false;

  *consumed = vec[1];

  if (n == 0 || args == NULL) {
    // We are not interested in results
    return true;
  }
  if (NumberOfCapturingGroups() < n) {
    // PCRE has fewer capturing groups than number of arg pointers passed in
    return false;
  }

  // If we got here, we must have matched the whole pattern.
  // We do not need (can not do) any more checks on the value of 'matches' here
  // -- see the comment for TryMatch.
  for (int i = 0; i < n; i++) {
    const int start = vec[2*(i+1)];
    const int limit = vec[2*(i+1)+1];
    if (!args[i]->Parse(text.data() + start, limit-start)) {
      // TODO: Should we indicate what the error was?
      return false;
    }
  }

  return true;
}

bool PCRE::DoMatch(const StringPiece& text,
                 Anchor anchor,
                 int* consumed,
                 const Arg* const args[],
                 int n) const {
  assert(n >= 0);
  size_t const vecsize = (1 + n) * 3;  // results + PCRE workspace
                                       // (as for kVecSize)
  int *vec = new int[vecsize];
  bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
  delete[] vec;
  return b;
}

bool PCRE::Rewrite(string *out, const StringPiece &rewrite,
                 const StringPiece &text, int *vec, int veclen) const {
  int number_of_capturing_groups = NumberOfCapturingGroups();
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
       s < end; s++) {
    int c = *s;
    if (c == '\\') {
      c = *++s;
      if (isdigit(c)) {
        int n = (c - '0');
        if (n >= veclen) {
          if (n <= number_of_capturing_groups) {
            // unmatched optional capturing group. treat
            // its value as empty string; i.e., nothing to append.
          } else {
            PCREPORT(ERROR) << "requested group " << n
                          << " in regexp " << rewrite.data();
            return false;
          }
        }
        int start = vec[2 * n];
        if (start >= 0)
          out->append(text.data() + start, vec[2 * n + 1] - start);
      } else if (c == '\\') {
        out->push_back('\\');
      } else {
        PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
        return false;
      }
    } else {
      out->push_back(c);
    }
  }
  return true;
}

bool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const {
  int max_token = -1;
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
       s < end; s++) {
    int c = *s;
    if (c != '\\') {
      continue;
    }
    if (++s == end) {
      *error = "Rewrite schema error: '\\' not allowed at end.";
      return false;
    }
    c = *s;
    if (c == '\\') {
      continue;
    }
    if (!isdigit(c)) {
      *error = "Rewrite schema error: "
               "'\\' must be followed by a digit or '\\'.";
      return false;
    }
    int n = (c - '0');
    if (max_token < n) {
      max_token = n;
    }
  }

  if (max_token > NumberOfCapturingGroups()) {
    SStringPrintf(error, "Rewrite schema requests %d matches, "
                  "but the regexp only has %d parenthesized subexpressions.",
                  max_token, NumberOfCapturingGroups());
    return false;
  }
  return true;
}


// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction.
int PCRE::NumberOfCapturingGroups() const {
  if (re_partial_ == NULL) return -1;

  int result;
  CHECK(pcre_fullinfo(re_partial_,       // The regular expression object
                      NULL,              // We did not study the pattern
                      PCRE_INFO_CAPTURECOUNT,
                      &result) == 0);
  return result;
}


/***** Parsers for various types *****/

bool PCRE::Arg::parse_null(const char* str, int n, void* dest) {
  // We fail if somebody asked us to store into a non-NULL void* pointer
  return (dest == NULL);
}

bool PCRE::Arg::parse_string(const char* str, int n, void* dest) {
  if (dest == NULL) return true;
  reinterpret_cast<string*>(dest)->assign(str, n);
  return true;
}

bool PCRE::Arg::parse_stringpiece(const char* str, int n, void* dest) {
  if (dest == NULL) return true;
  reinterpret_cast<StringPiece*>(dest)->set(str, n);
  return true;
}

bool PCRE::Arg::parse_char(const char* str, int n, void* dest) {
  if (n != 1) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<char*>(dest)) = str[0];
  return true;
}

bool PCRE::Arg::parse_uchar(const char* str, int n, void* dest) {
  if (n != 1) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
  return true;
}

// Largest number spec that we are willing to parse
static const int kMaxNumberLength = 32;

// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
// PCREQUIPCRES "n > 0"
// Copies "str" into "buf" and null-terminates if necessary.
// Returns one of:
//      a. "str" if no termination is needed
//      b. "buf" if the string was copied and null-terminated
//      c. "" if the input was invalid and has no hope of being parsed
static const char* TerminateNumber(char* buf, const char* str, int n) {
  if ((n > 0) && isspace(*str)) {
    // We are less forgiving than the strtoxxx() routines and do not
    // allow leading spaces.
    return "";
  }

  // See if the character right after the input text may potentially
  // look like a digit.
  if (isdigit(str[n]) ||
      ((str[n] >= 'a') && (str[n] <= 'f')) ||
      ((str[n] >= 'A') && (str[n] <= 'F'))) {
    if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
    memcpy(buf, str, n);
    buf[n] = '\0';
    return buf;
  } else {
    // We can parse right out of the supplied string, so return it.
    return str;
  }
}

bool PCRE::Arg::parse_long_radix(const char* str,
                               int n,
                               void* dest,
                               int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, str, n);
  char* end;
  errno = 0;
  long r = strtol(str, &end, radix);
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<long*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_ulong_radix(const char* str,
                                int n,
                                void* dest,
                                int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, str, n);
  if (str[0] == '-') {
   // strtoul() will silently accept negative numbers and parse
   // them.  This module is more strict and treats them as errors.
   return false;
  }

  char* end;
  errno = 0;
  unsigned long r = strtoul(str, &end, radix);
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<unsigned long*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_short_radix(const char* str,
                                int n,
                                void* dest,
                                int radix) {
  long r;
  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
  if ((short)r != r) return false;       // Out of range
  if (dest == NULL) return true;
  *(reinterpret_cast<short*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_ushort_radix(const char* str,
                                 int n,
                                 void* dest,
                                 int radix) {
  unsigned long r;
  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
  if ((ushort)r != r) return false;                      // Out of range
  if (dest == NULL) return true;
  *(reinterpret_cast<unsigned short*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_int_radix(const char* str,
                              int n,
                              void* dest,
                              int radix) {
  long r;
  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
  if ((int)r != r) return false;         // Out of range
  if (dest == NULL) return true;
  *(reinterpret_cast<int*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_uint_radix(const char* str,
                               int n,
                               void* dest,
                               int radix) {
  unsigned long r;
  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
  if ((uint)r != r) return false;                       // Out of range
  if (dest == NULL) return true;
  *(reinterpret_cast<unsigned int*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_longlong_radix(const char* str,
                                   int n,
                                   void* dest,
                                   int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, str, n);
  char* end;
  errno = 0;
  int64 r = strtoll(str, &end, radix);
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<int64*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_ulonglong_radix(const char* str,
                                    int n,
                                    void* dest,
                                    int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, str, n);
  if (str[0] == '-') {
    // strtoull() will silently accept negative numbers and parse
    // them.  This module is more strict and treats them as errors.
    return false;
  }
  char* end;
  errno = 0;
  uint64 r = strtoull(str, &end, radix);
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<uint64*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_double(const char* str, int n, void* dest) {
  if (n == 0) return false;
  static const int kMaxLength = 200;
  char buf[kMaxLength];
  if (n >= kMaxLength) return false;
  memcpy(buf, str, n);
  buf[n] = '\0';
  errno = 0;
  char* end;
  double r = strtod(buf, &end);
  if (end != buf + n) {
#ifdef COMPILER_MSVC
    // Microsoft's strtod() doesn't handle inf and nan, so we have to
    // handle it explicitly.  Speed is not important here because this
    // code is only called in unit tests.
    bool pos = true;
    const char* i = buf;
    if ('-' == *i) {
      pos = false;
      ++i;
    } else if ('+' == *i) {
      ++i;
    }
    if (0 == stricmp(i, "inf") || 0 == stricmp(i, "infinity")) {
      r = numeric_limits<double>::infinity();
      if (!pos)
        r = -r;
    } else if (0 == stricmp(i, "nan")) {
      r = numeric_limits<double>::quiet_NaN();
    } else {
      return false;
    }
#else
    return false;   // Leftover junk
#endif
  }
  if (errno) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<double*>(dest)) = r;
  return true;
}

bool PCRE::Arg::parse_float(const char* str, int n, void* dest) {
  double r;
  if (!parse_double(str, n, &r)) return false;
  if (dest == NULL) return true;
  *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
  return true;
}


#define DEFINE_INTEGER_PARSERS(name)                                        \
  bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) {          \
    return parse_##name##_radix(str, n, dest, 10);                          \
  }                                                                         \
  bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) {    \
    return parse_##name##_radix(str, n, dest, 16);                          \
  }                                                                         \
  bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) {  \
    return parse_##name##_radix(str, n, dest, 8);                           \
  }                                                                         \
  bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
    return parse_##name##_radix(str, n, dest, 0);                           \
  }

DEFINE_INTEGER_PARSERS(short);
DEFINE_INTEGER_PARSERS(ushort);
DEFINE_INTEGER_PARSERS(int);
DEFINE_INTEGER_PARSERS(uint);
DEFINE_INTEGER_PARSERS(long);
DEFINE_INTEGER_PARSERS(ulong);
DEFINE_INTEGER_PARSERS(longlong);
DEFINE_INTEGER_PARSERS(ulonglong);

#undef DEFINE_INTEGER_PARSERS

}  // namespace re2

/* [<][>][^][v][top][bottom][index][help] */