root/third_party/re2/re2/testing/exhaustive_tester.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. escape
  2. PrintResult
  3. HandleRegexp
  4. ExhaustiveTest
  5. EgrepTest

// Copyright 2008 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Exhaustive testing of regular expression matching.

// Each test picks an alphabet (e.g., "abc"), a maximum string length,
// a maximum regular expression length, and a maximum number of letters
// that can appear in the regular expression.  Given these parameters,
// it tries every possible regular expression and string, verifying that
// the NFA, DFA, and a trivial backtracking implementation agree about
// the location of the match.

#include <stdlib.h>
#include <stdio.h>

#ifndef LOGGING
#define LOGGING 0
#endif

#include "util/test.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/tester.h"

DEFINE_bool(show_regexps, false, "show regexps during testing");

DEFINE_int32(max_bad_regexp_inputs, 1,
             "Stop testing a regular expression after finding this many "
             "strings that break it.");

// Compiled in debug mode, the usual tests run for over an hour.
// Have to cut it down to make the unit test machines happy.
DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");

namespace re2 {

static char* escape(const StringPiece& sp) {
  static char buf[512];
  char* p = buf;
  *p++ = '\"';
  for (int i = 0; i < sp.size(); i++) {
    if(p+5 >= buf+sizeof buf)
      LOG(FATAL) << "ExhaustiveTester escape: too long";
    if(sp[i] == '\\' || sp[i] == '\"') {
      *p++ = '\\';
      *p++ = sp[i];
    } else if(sp[i] == '\n') {
      *p++ = '\\';
      *p++ = 'n';
    } else {
      *p++ = sp[i];
    }
  }
  *p++ = '\"';
  *p = '\0';
  return buf;
}

static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
  if (!re.Match(input, 0, input.size(), anchor, m, n)) {
    printf("-");
    return;
  }
  for (int i = 0; i < n; i++) {
    if (i > 0)
      printf(" ");
    if (m[i].begin() == NULL)
      printf("-");
    else
      printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
  }
}
        
// Processes a single generated regexp.
// Compiles it using Regexp interface and PCRE, and then
// checks that NFA, DFA, and PCRE all return the same results.
void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
  regexps_++;
  string regexp = const_regexp;
  if (!topwrapper_.empty())
    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());

  if (FLAGS_show_regexps) {
    printf("\r%s", regexp.c_str());
    fflush(stdout);
  }

  if (LOGGING) {
    // Write out test cases and answers for use in testing
    // other implementations, such as Go's regexp package.
    if (randomstrings_)
      LOG(ERROR) << "Cannot log with random strings.";
    if (regexps_ == 1) {  // first
      printf("strings\n");
      strgen_.Reset();
      while (strgen_.HasNext())
        printf("%s\n", escape(strgen_.Next()));
      printf("regexps\n");
    }
    printf("%s\n", escape(regexp));

    RE2 re(regexp);
    RE2::Options longest;
    longest.set_longest_match(true);
    RE2 relongest(regexp, longest);
    int ngroup = re.NumberOfCapturingGroups()+1;
    StringPiece* group = new StringPiece[ngroup];

    strgen_.Reset();
    while (strgen_.HasNext()) {
      StringPiece input = strgen_.Next();
      PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
      printf(";");
      PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
      printf(";");
      PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
      printf(";");
      PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
      printf("\n");
    }
    delete[] group;
    return;
  }

  Tester tester(regexp);
  if (tester.error())
    return;

  strgen_.Reset();
  strgen_.GenerateNULL();
  if (randomstrings_)
    strgen_.Random(stringseed_, stringcount_);
  int bad_inputs = 0;
  while (strgen_.HasNext()) {
    tests_++;
    if (!tester.TestInput(strgen_.Next())) {
      failures_++;
      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
        break;
    }
  }
}

// Runs an exhaustive test on the given parameters.
void ExhaustiveTest(int maxatoms, int maxops,
                    const vector<string>& alphabet,
                    const vector<string>& ops,
                    int maxstrlen, const vector<string>& stralphabet,
                    const string& wrapper,
                    const string& topwrapper) {
  if (DEBUG_MODE && FLAGS_quick_debug_mode) {
    if (maxatoms > 1)
      maxatoms--;
    if (maxops > 1)
      maxops--;
    if (maxstrlen > 1)
      maxstrlen--;
  }
  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
                     maxstrlen, stralphabet, wrapper,
                     topwrapper);
  t.Generate();
  if (!LOGGING) {
    printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
           t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
  }
  EXPECT_EQ(0, t.failures());
}

// Runs an exhaustive test using the given parameters and
// the basic egrep operators.
void EgrepTest(int maxatoms, int maxops, const string& alphabet,
               int maxstrlen, const string& stralphabet,
               const string& wrapper) {
  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };

  for (int i = 0; i < arraysize(tops); i++) {
    ExhaustiveTest(maxatoms, maxops,
                   Split("", alphabet),
                   RegexpGenerator::EgrepOps(),
                   maxstrlen,
                   Split("", stralphabet),
                   wrapper,
                   tops[i]);
  }
}

}  // namespace re2

/* [<][>][^][v][top][bottom][index][help] */