This source file includes following definitions.
- SimplifyRegexp
- ComputeSimple
- Simplify
- Copy
- ShortVisit
- PreVisit
- PostVisit
- Concat2
- SimplifyRepeat
- SimplifyCharClass
#include "util/util.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
namespace re2 {
bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
                            string* dst,
                            RegexpStatus* status) {
  Regexp* re = Parse(src, flags, status);
  if (re == NULL)
    return false;
  Regexp* sre = re->Simplify();
  re->Decref();
  if (sre == NULL) {
    
    LOG(ERROR) << "Simplify failed on " << src;
    if (status) {
      status->set_code(kRegexpInternalError);
      status->set_error_arg(src);
    }
    return false;
  }
  *dst = sre->ToString();
  sre->Decref();
  return true;
}
bool Regexp::ComputeSimple() {
  Regexp** subs;
  switch (op_) {
    case kRegexpNoMatch:
    case kRegexpEmptyMatch:
    case kRegexpLiteral:
    case kRegexpLiteralString:
    case kRegexpBeginLine:
    case kRegexpEndLine:
    case kRegexpBeginText:
    case kRegexpWordBoundary:
    case kRegexpNoWordBoundary:
    case kRegexpEndText:
    case kRegexpAnyChar:
    case kRegexpAnyByte:
    case kRegexpHaveMatch:
      return true;
    case kRegexpConcat:
    case kRegexpAlternate:
      
      subs = sub();
      for (int i = 0; i < nsub_; i++)
        if (!subs[i]->simple_)
          return false;
      return true;
    case kRegexpCharClass:
      
      if (ccb_ != NULL)
        return !ccb_->empty() && !ccb_->full();
      return !cc_->empty() && !cc_->full();
    case kRegexpCapture:
      subs = sub();
      return subs[0]->simple_;
    case kRegexpStar:
    case kRegexpPlus:
    case kRegexpQuest:
      subs = sub();
      if (!subs[0]->simple_)
        return false;
      switch (subs[0]->op_) {
        case kRegexpStar:
        case kRegexpPlus:
        case kRegexpQuest:
        case kRegexpEmptyMatch:
        case kRegexpNoMatch:
          return false;
        default:
          break;
      }
      return true;
    case kRegexpRepeat:
      return false;
  }
  LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
  return false;
}
class SimplifyWalker : public Regexp::Walker<Regexp*> {
 public:
  SimplifyWalker() {}
  virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
  virtual Regexp* PostVisit(Regexp* re,
                            Regexp* parent_arg,
                            Regexp* pre_arg,
                            Regexp** child_args, int nchild_args);
  virtual Regexp* Copy(Regexp* re);
  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
 private:
  
  
  
  
  static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
  
  
  
  static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
                                Regexp::ParseFlags parse_flags);
  
  
  
  static Regexp* SimplifyCharClass(Regexp* re);
  DISALLOW_EVIL_CONSTRUCTORS(SimplifyWalker);
};
Regexp* Regexp::Simplify() {
  if (simple_)
    return Incref();
  SimplifyWalker w;
  return w.Walk(this, NULL);
}
#define Simplify DontCallSimplify  
Regexp* SimplifyWalker::Copy(Regexp* re) {
  return re->Incref();
}
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
  
  
  LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
  return re->Incref();
}
Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
  if (re->simple_) {
    *stop = true;
    return re->Incref();
  }
  return NULL;
}
Regexp* SimplifyWalker::PostVisit(Regexp* re,
                                  Regexp* parent_arg,
                                  Regexp* pre_arg,
                                  Regexp** child_args,
                                  int nchild_args) {
  switch (re->op()) {
    case kRegexpNoMatch:
    case kRegexpEmptyMatch:
    case kRegexpLiteral:
    case kRegexpLiteralString:
    case kRegexpBeginLine:
    case kRegexpEndLine:
    case kRegexpBeginText:
    case kRegexpWordBoundary:
    case kRegexpNoWordBoundary:
    case kRegexpEndText:
    case kRegexpAnyChar:
    case kRegexpAnyByte:
    case kRegexpHaveMatch:
      
      re->simple_ = true;
      return re->Incref();
    case kRegexpConcat:
    case kRegexpAlternate: {
      
      
      bool changed = false;
      Regexp** subs = re->sub();
      for (int i = 0; i < re->nsub_; i++) {
        Regexp* sub = subs[i];
        Regexp* newsub = child_args[i];
        if (newsub != sub) {
          changed = true;
          break;
        }
      }
      if (!changed) {
        for (int i = 0; i < re->nsub_; i++) {
          Regexp* newsub = child_args[i];
          newsub->Decref();
        }
        re->simple_ = true;
        return re->Incref();
      }
      Regexp* nre = new Regexp(re->op(), re->parse_flags());
      nre->AllocSub(re->nsub_);
      Regexp** nre_subs = nre->sub();
      for (int i = 0; i <re->nsub_; i++)
        nre_subs[i] = child_args[i];
      nre->simple_ = true;
      return nre;
    }
    case kRegexpCapture: {
      Regexp* newsub = child_args[0];
      if (newsub == re->sub()[0]) {
        newsub->Decref();
        re->simple_ = true;
        return re->Incref();
      }
      Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
      nre->AllocSub(1);
      nre->sub()[0] = newsub;
      nre->cap_ = re->cap_;
      nre->simple_ = true;
      return nre;
    }
    case kRegexpStar:
    case kRegexpPlus:
    case kRegexpQuest: {
      Regexp* newsub = child_args[0];
      
      
      if (newsub->op() == kRegexpEmptyMatch)
        return newsub;
      
      if (newsub == re->sub()[0]) {
        newsub->Decref();
        re->simple_ = true;
        return re->Incref();
      }
      
      if (re->op() == newsub->op() &&
          re->parse_flags() == newsub->parse_flags())
        return newsub;
      Regexp* nre = new Regexp(re->op(), re->parse_flags());
      nre->AllocSub(1);
      nre->sub()[0] = newsub;
      nre->simple_ = true;
      return nre;
    }
    case kRegexpRepeat: {
      Regexp* newsub = child_args[0];
      
      
      if (newsub->op() == kRegexpEmptyMatch)
        return newsub;
      Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
                                   re->parse_flags());
      newsub->Decref();
      nre->simple_ = true;
      return nre;
    }
    case kRegexpCharClass: {
      Regexp* nre = SimplifyCharClass(re);
      nre->simple_ = true;
      return nre;
    }
  }
  LOG(ERROR) << "Simplify case not handled: " << re->op();
  return re->Incref();
}
Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
                                Regexp::ParseFlags parse_flags) {
  Regexp* re = new Regexp(kRegexpConcat, parse_flags);
  re->AllocSub(2);
  Regexp** subs = re->sub();
  subs[0] = re1;
  subs[1] = re2;
  return re;
}
Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
                                       Regexp::ParseFlags f) {
  
  if (max == -1) {
    
    if (min == 0)
      return Regexp::Star(re->Incref(), f);
    
    if (min == 1)
      return Regexp::Plus(re->Incref(), f);
    
    Regexp* nre = new Regexp(kRegexpConcat, f);
    nre->AllocSub(min);
    VLOG(1) << "Simplify " << min;
    Regexp** nre_subs = nre->sub();
    for (int i = 0; i < min-1; i++)
      nre_subs[i] = re->Incref();
    nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
    return nre;
  }
  
  if (min == 0 && max == 0)
    return new Regexp(kRegexpEmptyMatch, f);
  
  if (min == 1 && max == 1)
    return re->Incref();
  
  
  
  
  Regexp* nre = NULL;
  if (min > 0) {
    nre = new Regexp(kRegexpConcat, f);
    nre->AllocSub(min);
    Regexp** nre_subs = nre->sub();
    for (int i = 0; i < min; i++)
      nre_subs[i] = re->Incref();
  }
  
  if (max > min) {
    Regexp* suf = Regexp::Quest(re->Incref(), f);
    for (int i = min+1; i < max; i++)
      suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
    if (nre == NULL)
      nre = suf;
    else
      nre = Concat2(nre, suf, f);
  }
  if (nre == NULL) {
    
    
    LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
    return new Regexp(kRegexpNoMatch, f);
  }
  return nre;
}
Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
  CharClass* cc = re->cc();
  
  if (cc->empty())
    return new Regexp(kRegexpNoMatch, re->parse_flags());
  if (cc->full())
    return new Regexp(kRegexpAnyChar, re->parse_flags());
  return re->Incref();
}
}