This source file includes following definitions.
- url_encoded_pattern
- pattern
- transfer_padding_pattern
- crlf_pattern
- closing_pattern
- epilogue_pattern
- crlf_free_pattern
- preamble_pattern
- header_pattern
- content_disposition_pattern
- name_pattern
- value_pattern
- unquote_pattern
- Create
- CreateFromContentTypeHeader
- patterns_
- AllDataReadOK
- GetNextNameValue
- SetSource
- CreateBoundaryPatternFromLiteral
- StartsWithPattern
- patterns_
- AllDataReadOK
- FinishReadingPart
- GetNextNameValue
- SetSource
- TryReadHeader
#include "chrome/browser/extensions/api/web_request/form_data_parser.h"
#include <vector>
#include "base/lazy_instance.h"
#include "base/strings/string_util.h"
#include "base/values.h"
#include "net/base/escape.h"
#include "net/url_request/url_request.h"
#include "third_party/re2/re2/re2.h"
using base::DictionaryValue;
using base::ListValue;
using base::StringPiece;
using re2::RE2;
namespace extensions {
namespace {
#define CONTENT_DISPOSITION "content-disposition:"
static const char g_escape_closing_quote[] = "\\\\E";
static const size_t g_content_disposition_length =
sizeof(CONTENT_DISPOSITION) - 1;
struct Patterns {
Patterns();
~Patterns();
const RE2 transfer_padding_pattern;
const RE2 crlf_pattern;
const RE2 closing_pattern;
const RE2 epilogue_pattern;
const RE2 crlf_free_pattern;
const RE2 preamble_pattern;
const RE2 header_pattern;
const RE2 content_disposition_pattern;
const RE2 name_pattern;
const RE2 value_pattern;
const RE2 unquote_pattern;
const RE2 url_encoded_pattern;
};
Patterns::Patterns()
: transfer_padding_pattern("[ \\t]*\\r\\n"),
crlf_pattern("\\r\\n"),
closing_pattern("--[ \\t]*"),
epilogue_pattern("|\\r\\n(?s:.)*"),
crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"),
preamble_pattern(".+?"),
header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"),
content_disposition_pattern("(?i:" CONTENT_DISPOSITION ")"),
name_pattern("\\bname=\"([^\"]*)\""),
value_pattern("\\bfilename=\"([^\"]*)\""),
unquote_pattern(g_escape_closing_quote),
#define CHARACTER "(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))"
url_encoded_pattern("(" CHARACTER "*)=(" CHARACTER "*)") {}
#undef CHARACTER
#undef CONTENT_DISPOSITION
Patterns::~Patterns() {}
static base::LazyInstance<Patterns>::Leaky g_patterns =
LAZY_INSTANCE_INITIALIZER;
}
class FormDataParserUrlEncoded : public FormDataParser {
public:
FormDataParserUrlEncoded();
virtual ~FormDataParserUrlEncoded();
virtual bool AllDataReadOK() OVERRIDE;
virtual bool GetNextNameValue(Result* result) OVERRIDE;
virtual bool SetSource(const base::StringPiece& source) OVERRIDE;
private:
const RE2& pattern() const {
return patterns_->url_encoded_pattern;
}
static const size_t args_size_ = 2u;
static const net::UnescapeRule::Type unescape_rules_;
re2::StringPiece source_;
bool source_set_;
bool source_malformed_;
std::string name_;
std::string value_;
const RE2::Arg arg_name_;
const RE2::Arg arg_value_;
const RE2::Arg* args_[args_size_];
const Patterns* patterns_;
DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded);
};
class FormDataParserMultipart : public FormDataParser {
public:
explicit FormDataParserMultipart(const std::string& boundary_separator);
virtual ~FormDataParserMultipart();
virtual bool AllDataReadOK() OVERRIDE;
virtual bool GetNextNameValue(Result* result) OVERRIDE;
virtual bool SetSource(const base::StringPiece& source) OVERRIDE;
private:
enum State {
STATE_INIT,
STATE_READY,
STATE_FINISHED,
STATE_SUSPEND,
STATE_ERROR
};
static std::string CreateBoundaryPatternFromLiteral(
const std::string& literal);
static bool StartsWithPattern(const re2::StringPiece& input,
const RE2& pattern);
bool TryReadHeader(base::StringPiece* name,
base::StringPiece* value,
bool* value_assigned);
bool FinishReadingPart(base::StringPiece* data);
const RE2& transfer_padding_pattern() const {
return patterns_->transfer_padding_pattern;
}
const RE2& crlf_pattern() const {
return patterns_->crlf_pattern;
}
const RE2& closing_pattern() const {
return patterns_->closing_pattern;
}
const RE2& epilogue_pattern() const {
return patterns_->epilogue_pattern;
}
const RE2& crlf_free_pattern() const {
return patterns_->crlf_free_pattern;
}
const RE2& preamble_pattern() const {
return patterns_->preamble_pattern;
}
const RE2& header_pattern() const {
return patterns_->header_pattern;
}
const RE2& content_disposition_pattern() const {
return patterns_->content_disposition_pattern;
}
const RE2& name_pattern() const {
return patterns_->name_pattern;
}
const RE2& value_pattern() const {
return patterns_->value_pattern;
}
static const RE2& unquote_pattern() {
return g_patterns.Get().unquote_pattern;
}
const RE2 dash_boundary_pattern_;
State state_;
re2::StringPiece source_;
const Patterns* patterns_;
DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart);
};
FormDataParser::Result::Result() {}
FormDataParser::Result::~Result() {}
FormDataParser::~FormDataParser() {}
scoped_ptr<FormDataParser> FormDataParser::Create(
const net::URLRequest& request) {
std::string value;
const bool found = request.extra_request_headers().GetHeader(
net::HttpRequestHeaders::kContentType, &value);
return CreateFromContentTypeHeader(found ? &value : NULL);
}
scoped_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader(
const std::string* content_type_header) {
enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE};
ParserChoice choice = ERROR_CHOICE;
std::string boundary;
if (content_type_header == NULL) {
choice = URL_ENCODED;
} else {
const std::string content_type(
content_type_header->substr(0, content_type_header->find(';')));
if (base::strcasecmp(
content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
choice = URL_ENCODED;
} else if (base::strcasecmp(
content_type.c_str(), "multipart/form-data") == 0) {
static const char kBoundaryString[] = "boundary=";
size_t offset = content_type_header->find(kBoundaryString);
if (offset == std::string::npos) {
return scoped_ptr<FormDataParser>();
}
offset += sizeof(kBoundaryString) - 1;
boundary = content_type_header->substr(
offset, content_type_header->find(';', offset));
if (!boundary.empty())
choice = MULTIPART;
}
}
switch (choice) {
case URL_ENCODED:
return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
case MULTIPART:
return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
default:
return scoped_ptr<FormDataParser>();
}
}
FormDataParser::FormDataParser() {}
const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ =
net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
FormDataParserUrlEncoded::FormDataParserUrlEncoded()
: source_(NULL),
source_set_(false),
source_malformed_(false),
arg_name_(&name_),
arg_value_(&value_),
patterns_(g_patterns.Pointer()) {
args_[0] = &arg_name_;
args_[1] = &arg_value_;
}
FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
bool FormDataParserUrlEncoded::AllDataReadOK() {
return source_set_ && source_.size() == 0 && !source_malformed_;
}
bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
if (!source_set_ || source_malformed_)
return false;
bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_);
if (success) {
result->set_name(net::UnescapeURLComponent(name_, unescape_rules_));
result->set_value(net::UnescapeURLComponent(value_, unescape_rules_));
}
if (source_.length() > 0) {
if (source_[0] == '&')
source_.remove_prefix(1);
else
source_malformed_ = true;
}
return success && !source_malformed_;
}
bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {
if (source_set_)
return false;
source_.set(source.data(), source.size());
source_set_ = true;
source_malformed_ = false;
return true;
}
std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral(
const std::string& literal) {
#define OPEN_QUOTE "\\Q"
static const char quote[] = OPEN_QUOTE;
static const char unquote[] = "\\E";
std::string result(OPEN_QUOTE "--");
#undef OPEN_QUOTE
re2::StringPiece seek_unquote(literal);
const char* copy_start = literal.data();
size_t copy_length = literal.size();
while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) {
copy_length = seek_unquote.data() - copy_start;
result.append(copy_start, copy_length);
result.append(g_escape_closing_quote);
result.append(quote);
copy_start = seek_unquote.data();
}
copy_length = (literal.data() + literal.size()) - copy_start;
result.append(copy_start, copy_length);
result.append(unquote);
return result;
}
bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input,
const RE2& pattern) {
return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0);
}
FormDataParserMultipart::FormDataParserMultipart(
const std::string& boundary_separator)
: dash_boundary_pattern_(
CreateBoundaryPatternFromLiteral(boundary_separator)),
state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR),
patterns_(g_patterns.Pointer()) {}
FormDataParserMultipart::~FormDataParserMultipart() {}
bool FormDataParserMultipart::AllDataReadOK() {
return state_ == STATE_FINISHED;
}
bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) {
const char* data_start = source_.data();
while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
if (!RE2::Consume(&source_, crlf_free_pattern()) ||
!RE2::Consume(&source_, crlf_pattern())) {
state_ = STATE_ERROR;
return false;
}
}
if (data != NULL) {
if (source_.data() == data_start) {
state_ = STATE_ERROR;
return false;
}
data->set(data_start, source_.data() - data_start - 2u);
}
CHECK(RE2::Consume(&source_, dash_boundary_pattern_));
if (StartsWithPattern(source_, closing_pattern())) {
CHECK(RE2::Consume(&source_, closing_pattern()));
if (RE2::Consume(&source_, epilogue_pattern()))
state_ = STATE_FINISHED;
else
state_ = STATE_ERROR;
} else {
if (!RE2::Consume(&source_, transfer_padding_pattern()))
state_ = STATE_ERROR;
}
return state_ != STATE_ERROR;
}
bool FormDataParserMultipart::GetNextNameValue(Result* result) {
if (source_.size() == 0 || state_ != STATE_READY)
return false;
base::StringPiece name;
base::StringPiece value;
bool value_assigned = false;
bool value_assigned_temp;
while (TryReadHeader(&name, &value, &value_assigned_temp))
value_assigned |= value_assigned_temp;
if (name.size() == 0 || state_ == STATE_ERROR) {
state_ = STATE_ERROR;
return false;
}
if (!RE2::Consume(&source_, crlf_pattern())) {
state_ = STATE_ERROR;
return false;
}
bool return_value;
if (value_assigned && source_.size() == 0) {
return_value = true;
state_ = STATE_SUSPEND;
} else {
return_value = FinishReadingPart(value_assigned ? NULL : &value);
}
std::string unescaped_name = net::UnescapeURLComponent(
name.as_string(),
net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS);
result->set_name(unescaped_name);
result->set_value(value);
return return_value;
}
bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {
if (source.data() == NULL || source_.size() != 0)
return false;
source_.set(source.data(), source.size());
switch (state_) {
case STATE_INIT:
while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
if (!RE2::Consume(&source_, preamble_pattern())) {
state_ = STATE_ERROR;
break;
}
}
if (state_ != STATE_ERROR) {
if (!RE2::Consume(&source_, dash_boundary_pattern_) ||
!RE2::Consume(&source_, transfer_padding_pattern()))
state_ = STATE_ERROR;
else
state_ = STATE_READY;
}
break;
case STATE_READY:
break;
case STATE_SUSPEND:
state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR;
break;
default:
state_ = STATE_ERROR;
}
return state_ != STATE_ERROR;
}
bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name,
base::StringPiece* value,
bool* value_assigned) {
*value_assigned = false;
const char* header_start = source_.data();
if (!RE2::Consume(&source_, header_pattern()))
return false;
re2::StringPiece header(header_start, source_.data() - header_start - 2u);
if (!StartsWithPattern(header, content_disposition_pattern()))
return true;
re2::StringPiece groups[2u];
if (!name_pattern().Match(header,
g_content_disposition_length, header.size(),
RE2::UNANCHORED, groups, 2)) {
state_ = STATE_ERROR;
return true;
}
name->set(groups[1].data(), groups[1].size());
if (value_pattern().Match(header,
g_content_disposition_length, header.size(),
RE2::UNANCHORED, groups, 2)) {
value->set(groups[1].data(), groups[1].size());
*value_assigned = true;
}
return true;
}
}