This source file includes following definitions.
- headers_
- Reset
- ParseStateToString
- ErrorCodeToString
- ParseHTTPFirstLine
- ProcessFirstLine
- CleanUpKeyValueWhitespace
- FindColonsAndParseIntoKeyValue
- ProcessContentLengthLine
- ProcessTransferEncodingLine
- SplitStringPiece
- ProcessChunkExtensionsManual
- ProcessChunkExtensions
- ProcessHeaderLines
- AssignParseStateAfterHeadersHaveBeenParsed
- ProcessHeaders
- BytesSafeToSplice
- BytesSpliced
- ProcessInput
#include "net/tools/balsa/balsa_frame.h"
#include <assert.h>
#if __SSE2__
#include <emmintrin.h>
#endif
#include <strings.h>
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "base/logging.h"
#include "base/port.h"
#include "base/strings/string_piece.h"
#include "net/tools/balsa/balsa_enums.h"
#include "net/tools/balsa/balsa_headers.h"
#include "net/tools/balsa/balsa_visitor_interface.h"
#include "net/tools/balsa/buffer_interface.h"
#include "net/tools/balsa/simple_buffer.h"
#include "net/tools/balsa/split.h"
#include "net/tools/balsa/string_piece_utils.h"
namespace net {
static const char kContentLength[] = "content-length";
static const size_t kContentLengthSize = sizeof(kContentLength) - 1;
static const char kTransferEncoding[] = "transfer-encoding";
static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;
BalsaFrame::BalsaFrame()
: last_char_was_slash_r_(false),
saw_non_newline_char_(false),
start_was_space_(true),
chunk_length_character_extracted_(false),
is_request_(true),
request_was_head_(false),
max_header_length_(16 * 1024),
max_request_uri_length_(2048),
visitor_(&do_nothing_visitor_),
chunk_length_remaining_(0),
content_length_remaining_(0),
last_slash_n_loc_(NULL),
last_recorded_slash_n_loc_(NULL),
last_slash_n_idx_(0),
term_chars_(0),
parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),
last_error_(BalsaFrameEnums::NO_ERROR),
headers_(NULL) {
}
BalsaFrame::~BalsaFrame() {}
void BalsaFrame::Reset() {
last_char_was_slash_r_ = false;
saw_non_newline_char_ = false;
start_was_space_ = true;
chunk_length_character_extracted_ = false;
chunk_length_remaining_ = 0;
content_length_remaining_ = 0;
last_slash_n_loc_ = NULL;
last_recorded_slash_n_loc_ = NULL;
last_slash_n_idx_ = 0;
term_chars_ = 0;
parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
last_error_ = BalsaFrameEnums::NO_ERROR;
lines_.clear();
if (headers_ != NULL) {
headers_->Clear();
}
}
const char* BalsaFrameEnums::ParseStateToString(
BalsaFrameEnums::ParseState error_code) {
switch (error_code) {
case PARSE_ERROR:
return "PARSE_ERROR";
case READING_HEADER_AND_FIRSTLINE:
return "READING_HEADER_AND_FIRSTLINE";
case READING_CHUNK_LENGTH:
return "READING_CHUNK_LENGTH";
case READING_CHUNK_EXTENSION:
return "READING_CHUNK_EXTENSION";
case READING_CHUNK_DATA:
return "READING_CHUNK_DATA";
case READING_CHUNK_TERM:
return "READING_CHUNK_TERM";
case READING_LAST_CHUNK_TERM:
return "READING_LAST_CHUNK_TERM";
case READING_TRAILER:
return "READING_TRAILER";
case READING_UNTIL_CLOSE:
return "READING_UNTIL_CLOSE";
case READING_CONTENT:
return "READING_CONTENT";
case MESSAGE_FULLY_READ:
return "MESSAGE_FULLY_READ";
case NUM_STATES:
return "UNKNOWN_STATE";
}
return "UNKNOWN_STATE";
}
const char* BalsaFrameEnums::ErrorCodeToString(
BalsaFrameEnums::ErrorCode error_code) {
switch (error_code) {
case NO_ERROR:
return "NO_ERROR";
case NO_STATUS_LINE_IN_RESPONSE:
return "NO_STATUS_LINE_IN_RESPONSE";
case NO_REQUEST_LINE_IN_REQUEST:
return "NO_REQUEST_LINE_IN_REQUEST";
case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:
return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";
case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:
return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";
case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:
return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";
case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:
return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";
case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:
return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";
case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:
return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";
case FAILED_CONVERTING_STATUS_CODE_TO_INT:
return "FAILED_CONVERTING_STATUS_CODE_TO_INT";
case REQUEST_URI_TOO_LONG:
return "REQUEST_URI_TOO_LONG";
case HEADERS_TOO_LONG:
return "HEADERS_TOO_LONG";
case UNPARSABLE_CONTENT_LENGTH:
return "UNPARSABLE_CONTENT_LENGTH";
case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:
return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";
case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:
return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";
case HEADER_MISSING_COLON:
return "HEADER_MISSING_COLON";
case INVALID_CHUNK_LENGTH:
return "INVALID_CHUNK_LENGTH";
case CHUNK_LENGTH_OVERFLOW:
return "CHUNK_LENGTH_OVERFLOW";
case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:
return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";
case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:
return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";
case MULTIPLE_CONTENT_LENGTH_KEYS:
return "MULTIPLE_CONTENT_LENGTH_KEYS";
case MULTIPLE_TRANSFER_ENCODING_KEYS:
return "MULTIPLE_TRANSFER_ENCODING_KEYS";
case UNKNOWN_TRANSFER_ENCODING:
return "UNKNOWN_TRANSFER_ENCODING";
case INVALID_HEADER_FORMAT:
return "INVALID_HEADER_FORMAT";
case INTERNAL_LOGIC_ERROR:
return "INTERNAL_LOGIC_ERROR";
case NUM_ERROR_CODES:
return "UNKNOWN_ERROR";
}
return "UNKNOWN_ERROR";
}
bool ParseHTTPFirstLine(const char* begin,
const char* end,
bool is_request,
size_t max_request_uri_length,
BalsaHeaders* headers,
BalsaFrameEnums::ErrorCode* error_code) {
const char* current = begin;
--end;
if (*end != '\n') {
*error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
<< headers->OriginalHeadersForDebugging();
return false;
}
while (begin < end && *end <= ' ') {
--end;
}
DCHECK(*end != '\n');
if (*end == '\n') {
*error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
<< headers->OriginalHeadersForDebugging();
return false;
}
++end;
if (end == begin) {
*error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
<< headers->OriginalHeadersForDebugging();
return false;
}
headers->whitespace_1_idx_ = current - begin;
#if 0
while (*current <= ' ') {
++current;
}
#endif
headers->non_whitespace_1_idx_ = current - begin;
do {
++current;
if (current == end) {
headers->whitespace_2_idx_ = current - begin;
headers->non_whitespace_2_idx_ = current - begin;
headers->whitespace_3_idx_ = current - begin;
headers->non_whitespace_3_idx_ = current - begin;
headers->whitespace_4_idx_ = current - begin;
*error_code =
static_cast<BalsaFrameEnums::ErrorCode>(
BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
is_request);
if (!is_request) {
return false;
}
goto output_exhausted;
}
} while (*current > ' ');
headers->whitespace_2_idx_ = current - begin;
do {
++current;
} while (*current <= ' ');
headers->non_whitespace_2_idx_ = current - begin;
do {
++current;
if (current == end) {
headers->whitespace_3_idx_ = current - begin;
headers->non_whitespace_3_idx_ = current - begin;
headers->whitespace_4_idx_ = current - begin;
*error_code =
static_cast<BalsaFrameEnums::ErrorCode>(
BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE
+ is_request);
goto output_exhausted;
}
} while (*current > ' ');
headers->whitespace_3_idx_ = current - begin;
do {
++current;
} while (*current <= ' ');
headers->non_whitespace_3_idx_ = current - begin;
headers->whitespace_4_idx_ = end - begin;
output_exhausted:
if (is_request) {
if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >
max_request_uri_length) {
*error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;
return false;
}
} else {
headers->parsed_response_code_ = 0;
{
const char* parsed_response_code_current =
begin + headers->non_whitespace_2_idx_;
const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;
const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
while (parsed_response_code_current < parsed_response_code_end) {
if (*parsed_response_code_current < '0' ||
*parsed_response_code_current > '9') {
*error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
return false;
}
size_t status_code_x_10 = headers->parsed_response_code_ * 10;
uint8 c = *parsed_response_code_current - '0';
if ((headers->parsed_response_code_ > kMaxDiv10) ||
(std::numeric_limits<size_t>::max() - status_code_x_10) < c) {
*error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
return false;
}
headers->parsed_response_code_ = status_code_x_10 + c;
++parsed_response_code_current;
}
}
}
return true;
}
void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
BalsaFrameEnums::ErrorCode previous_error = last_error_;
if (!ParseHTTPFirstLine(begin,
end,
is_request_,
max_request_uri_length_,
headers_,
&last_error_)) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleHeaderError(this);
return;
}
if (previous_error != last_error_) {
visitor_->HandleHeaderWarning(this);
}
if (is_request_) {
int version_length =
headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;
visitor_->ProcessRequestFirstLine(
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
begin + headers_->non_whitespace_2_idx_,
headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
begin + headers_->non_whitespace_3_idx_,
version_length);
if (version_length == 0)
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
} else {
visitor_->ProcessResponseFirstLine(
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
begin + headers_->non_whitespace_2_idx_,
headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
begin + headers_->non_whitespace_3_idx_,
headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
}
}
void BalsaFrame::CleanUpKeyValueWhitespace(
const char* stream_begin,
const char* line_begin,
const char* current,
const char* line_end,
HeaderLineDescription* current_header_line) {
const char* colon_loc = current;
DCHECK_LT(colon_loc, line_end);
DCHECK_EQ(':', *colon_loc);
DCHECK_EQ(':', *current);
DCHECK_GE(' ', *line_end)
<< "\"" << std::string(line_begin, line_end) << "\"";
--current;
while (current > line_begin && *current <= ' ') --current;
current += (current != colon_loc);
current_header_line->key_end_idx = current - stream_begin;
current = colon_loc;
DCHECK_EQ(':', *current);
++current;
while (current < line_end && *current <= ' ') ++current;
current_header_line->value_begin_idx = current - stream_begin;
DCHECK_GE(current_header_line->key_end_idx,
current_header_line->first_char_idx);
DCHECK_GE(current_header_line->value_begin_idx,
current_header_line->key_end_idx);
DCHECK_GE(current_header_line->last_char_idx,
current_header_line->value_begin_idx);
}
inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {
DCHECK(!lines_.empty());
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
const Lines::size_type lines_size_m1 = lines_.size() - 1;
#if __SSE2__
const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':',
':', ':', ':', ':', ':', ':', ':', ':'};
const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;
#endif
const char* current = stream_begin + lines_[1].first;
for (Lines::size_type i = 1; i < lines_size_m1;) {
const char* line_begin = stream_begin + lines_[i].first;
for (++i; i < lines_size_m1; ++i) {
const char c = *(stream_begin + lines_[i].first);
if (c > ' ') {
break;
}
}
const char* line_end = stream_begin + lines_[i - 1].second;
DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
--line_end;
DCHECK_EQ('\n', *line_end)
<< "\"" << std::string(line_begin, line_end) << "\"";
while (*line_end <= ' ' && line_end > line_begin) {
--line_end;
}
++line_end;
DCHECK_GE(' ', *line_end);
DCHECK_LT(line_begin, line_end);
headers_->header_lines_.push_back(
HeaderLineDescription(line_begin - stream_begin,
line_end - stream_begin,
line_end - stream_begin,
line_end - stream_begin,
0));
if (current >= line_end) {
last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
visitor_->HandleHeaderWarning(this);
continue;
} else if (current < line_begin) {
current = line_begin;
}
#if __SSE2__
while (current < header_lines_end_m16) {
__m128i header_bytes =
_mm_loadu_si128(reinterpret_cast<const __m128i *>(current));
__m128i colon_cmp =
_mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons));
int colon_msk = _mm_movemask_epi8(colon_cmp);
if (colon_msk == 0) {
current += 16;
continue;
}
current += (ffs(colon_msk) - 1);
if (current > line_end) {
break;
}
goto found_colon;
}
#endif
for (; current < line_end; ++current) {
if (*current != ':') {
continue;
}
goto found_colon;
}
last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
visitor_->HandleHeaderWarning(this);
continue;
found_colon:
DCHECK_EQ(*current, ':');
DCHECK_LE(current - stream_begin, line_end - stream_begin);
DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
HeaderLineDescription& current_header_line = headers_->header_lines_.back();
current_header_line.key_end_idx = current - stream_begin;
current_header_line.value_begin_idx = current_header_line.key_end_idx;
if (current < line_end) {
++current_header_line.key_end_idx;
CleanUpKeyValueWhitespace(stream_begin,
line_begin,
current,
line_end,
¤t_header_line);
}
}
}
void BalsaFrame::ProcessContentLengthLine(
HeaderLines::size_type line_idx,
BalsaHeadersEnums::ContentLengthStatus* status,
size_t* length) {
const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
const char* line_end = stream_begin + header_line.last_char_idx;
const char* value_begin = (stream_begin + header_line.value_begin_idx);
if (value_begin >= line_end) {
#if DEBUGFRAMER
LOG(INFO) << "invalid content-length -- no non-whitespace value data";
#endif
*status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
return;
}
*length = 0;
while (value_begin < line_end) {
if (*value_begin < '0' || *value_begin > '9') {
*status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
#if DEBUGFRAMER
LOG(INFO) << "invalid content-length - non numeric character detected";
#endif
return;
}
const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
size_t length_x_10 = *length * 10;
const unsigned char c = *value_begin - '0';
if (*length > kMaxDiv10 ||
(std::numeric_limits<size_t>::max() - length_x_10) < c) {
*status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
#if DEBUGFRAMER
LOG(INFO) << "content-length overflow";
#endif
return;
}
*length = length_x_10 + c;
++value_begin;
}
#if DEBUGFRAMER
LOG(INFO) << "content_length parsed: " << *length;
#endif
*status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;
}
void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
const char* line_end = stream_begin + header_line.last_char_idx;
const char* value_begin = stream_begin + header_line.value_begin_idx;
size_t value_length = line_end - value_begin;
if ((value_length == 7) &&
!strncasecmp(value_begin, "chunked", 7)) {
headers_->transfer_encoding_is_chunked_ = true;
} else if ((value_length == 8) &&
!strncasecmp(value_begin, "identity", 8)) {
headers_->transfer_encoding_is_chunked_ = false;
} else {
last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleHeaderError(this);
return;
}
}
namespace {
bool SplitStringPiece(base::StringPiece original, char delim,
base::StringPiece* before, base::StringPiece* after) {
const char* p = original.data();
const char* end = p + original.size();
while (p != end) {
if (*p == delim) {
++p;
} else {
const char* start = p;
while (++p != end && *p != delim) {
}
*before = base::StringPiece(start, p - start);
if (p != end)
*after = base::StringPiece(p + 1, end - (p + 1));
else
*after = base::StringPiece("");
StringPieceUtils::RemoveWhitespaceContext(before);
StringPieceUtils::RemoveWhitespaceContext(after);
return true;
}
}
*before = original;
*after = "";
return false;
}
void ProcessChunkExtensionsManual(base::StringPiece all_extensions,
BalsaHeaders* extensions) {
base::StringPiece extension;
base::StringPiece remaining;
StringPieceUtils::RemoveWhitespaceContext(&all_extensions);
SplitStringPiece(all_extensions, ';', &extension, &remaining);
while (!extension.empty()) {
base::StringPiece key;
base::StringPiece value;
SplitStringPiece(extension, '=', &key, &value);
if (!value.empty()) {
if (!value.empty() && value[0] == '"')
value.remove_prefix(1);
if (!value.empty() && value[value.length() - 1] == '"')
value.remove_suffix(1);
}
extensions->AppendHeader(key, value);
StringPieceUtils::RemoveWhitespaceContext(&remaining);
SplitStringPiece(remaining, ';', &extension, &remaining);
}
}
}
void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,
BalsaHeaders* extensions) {
ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);
}
void BalsaFrame::ProcessHeaderLines() {
HeaderLines::size_type content_length_idx = 0;
HeaderLines::size_type transfer_encoding_idx = 0;
DCHECK(!lines_.empty());
#if DEBUGFRAMER
LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
#endif
if (lines_.size() > 2) {
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
FindColonsAndParseIntoKeyValue();
const HeaderLines::size_type
header_lines_size = headers_->header_lines_.size();
for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
const HeaderLineDescription& current_header_line =
headers_->header_lines_[i];
const char* key_begin =
(stream_begin + current_header_line.first_char_idx);
const char* key_end = (stream_begin + current_header_line.key_end_idx);
const size_t key_len = key_end - key_begin;
const char c = *key_begin;
#if DEBUGFRAMER
LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)
<< " c: '" << c << "' key_len: " << key_len;
#endif
if (c == 'c' || c == 'C') {
if ((key_len == kContentLengthSize) &&
0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {
BalsaHeadersEnums::ContentLengthStatus content_length_status =
BalsaHeadersEnums::NO_CONTENT_LENGTH;
size_t length = 0;
ProcessContentLengthLine(i, &content_length_status, &length);
if (content_length_idx != 0) {
if ((headers_->content_length_status_ != content_length_status) ||
((headers_->content_length_status_ ==
BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
length != headers_->content_length_)) {
last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleHeaderError(this);
return;
}
continue;
} else {
content_length_idx = i + 1;
headers_->content_length_status_ = content_length_status;
headers_->content_length_ = length;
content_length_remaining_ = length;
}
}
} else if (c == 't' || c == 'T') {
if ((key_len == kTransferEncodingSize) &&
0 == strncasecmp(key_begin, kTransferEncoding,
kTransferEncodingSize)) {
if (transfer_encoding_idx != 0) {
last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleHeaderError(this);
return;
}
transfer_encoding_idx = i + 1;
}
} else if (i == 0 && (key_len == 0 || c == ' ')) {
last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleHeaderError(this);
return;
}
}
if (headers_->transfer_encoding_is_chunked_) {
headers_->content_length_ = 0;
headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
content_length_remaining_ = 0;
}
if (transfer_encoding_idx != 0) {
ProcessTransferEncodingLine(transfer_encoding_idx - 1);
}
}
}
void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
if (is_request_ ||
!(request_was_head_ ||
(headers_->parsed_response_code_ >= 100 &&
headers_->parsed_response_code_ < 200) ||
(headers_->parsed_response_code_ == 204) ||
(headers_->parsed_response_code_ == 304))) {
if (headers_->transfer_encoding_is_chunked_) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
} else {
switch (headers_->content_length_status_) {
case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
if (headers_->content_length_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
} else {
parse_state_ = BalsaFrameEnums::READING_CONTENT;
}
break;
case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;
visitor_->HandleHeaderError(this);
break;
case BalsaHeadersEnums::NO_CONTENT_LENGTH:
if (is_request_) {
base::StringPiece method = headers_->request_method();
if ((method.size() == 4 &&
strncmp(method.data(), "POST", 4) == 0) ||
(method.size() == 3 &&
strncmp(method.data(), "PUT", 3) == 0)) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ =
BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;
visitor_->HandleHeaderError(this);
break;
}
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
} else {
parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;
visitor_->HandleHeaderWarning(this);
}
break;
default:
LOG(FATAL) << "Saw a content_length_status: "
<< headers_->content_length_status_ << " which is unknown.";
}
}
}
}
size_t BalsaFrame::ProcessHeaders(const char* message_start,
size_t message_length) {
const char* const original_message_start = message_start;
const char* const message_end = message_start + message_length;
const char* message_current = message_start;
const char* checkpoint = message_start;
if (message_length == 0) {
goto bottom;
}
while (message_current < message_end) {
size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
if (!saw_non_newline_char_) {
do {
const char c = *message_current;
if (c != '\r' && c != '\n') {
if (c <= ' ') {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;
visitor_->HandleHeaderError(this);
goto bottom;
} else {
saw_non_newline_char_ = true;
checkpoint = message_start = message_current;
goto read_real_message;
}
}
++message_current;
} while (message_current < message_end);
goto bottom;
} else {
read_real_message:
#if __SSE2__
{
const char* const message_end_m16 = message_end - 16;
__v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' };
while (message_current < message_end_m16) {
__m128i msg_bytes =
_mm_loadu_si128(const_cast<__m128i *>(
reinterpret_cast<const __m128i *>(message_current)));
__m128i newline_cmp =
_mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines));
int newline_msk = _mm_movemask_epi8(newline_cmp);
if (newline_msk == 0) {
message_current += 16;
continue;
}
message_current += (ffs(newline_msk) - 1);
const size_t relative_idx = message_current - message_start;
const size_t message_current_idx = 1 + base_idx + relative_idx;
lines_.push_back(std::make_pair(last_slash_n_idx_,
message_current_idx));
if (lines_.size() == 1) {
headers_->WriteFromFramer(checkpoint,
1 + message_current - checkpoint);
checkpoint = message_current + 1;
const char* begin = headers_->OriginalHeaderStreamBegin();
#if DEBUGFRAMER
LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
LOG(INFO) << "is_request_: " << is_request_;
#endif
ProcessFirstLine(begin, begin + lines_[0].second);
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
goto process_lines;
else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
goto bottom;
}
const size_t chars_since_last_slash_n = (message_current_idx -
last_slash_n_idx_);
last_slash_n_idx_ = message_current_idx;
if (chars_since_last_slash_n > 2) {
++message_current;
continue;
}
if ((chars_since_last_slash_n == 1) ||
(((message_current > message_start) &&
(*(message_current - 1) == '\r')) ||
(last_char_was_slash_r_))) {
goto process_lines;
}
++message_current;
}
}
#endif
while (message_current < message_end) {
if (*message_current != '\n') {
++message_current;
continue;
}
const size_t relative_idx = message_current - message_start;
const size_t message_current_idx = 1 + base_idx + relative_idx;
lines_.push_back(std::make_pair(last_slash_n_idx_,
message_current_idx));
if (lines_.size() == 1) {
headers_->WriteFromFramer(checkpoint,
1 + message_current - checkpoint);
checkpoint = message_current + 1;
const char* begin = headers_->OriginalHeaderStreamBegin();
#if DEBUGFRAMER
LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
LOG(INFO) << "is_request_: " << is_request_;
#endif
ProcessFirstLine(begin, begin + lines_[0].second);
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
goto process_lines;
else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
goto bottom;
}
const size_t chars_since_last_slash_n = (message_current_idx -
last_slash_n_idx_);
last_slash_n_idx_ = message_current_idx;
if (chars_since_last_slash_n > 2) {
++message_current;
continue;
}
if ((chars_since_last_slash_n == 1) ||
(((message_current > message_start) &&
(*(message_current - 1) == '\r')) ||
(last_char_was_slash_r_))) {
goto process_lines;
}
++message_current;
}
}
continue;
process_lines:
++message_current;
DCHECK(message_current >= message_start);
if (message_current > message_start) {
headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
}
if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
visitor_->HandleHeaderError(this);
goto bottom;
}
headers_->DoneWritingFromFramer();
{
const char* readable_ptr = NULL;
size_t readable_size = 0;
headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);
visitor_->ProcessHeaderInput(readable_ptr, readable_size);
}
ProcessHeaderLines();
if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
goto bottom;
}
AssignParseStateAfterHeadersHaveBeenParsed();
if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
goto bottom;
}
visitor_->ProcessHeaders(*headers_);
visitor_->HeaderDone();
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
visitor_->MessageDone();
}
goto bottom;
}
last_char_was_slash_r_ = (*(message_end - 1) == '\r');
DCHECK(message_current >= message_start);
if (message_current > message_start) {
headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
}
bottom:
return message_current - original_message_start;
}
size_t BalsaFrame::BytesSafeToSplice() const {
switch (parse_state_) {
case BalsaFrameEnums::READING_CHUNK_DATA:
return chunk_length_remaining_;
case BalsaFrameEnums::READING_UNTIL_CLOSE:
return std::numeric_limits<size_t>::max();
case BalsaFrameEnums::READING_CONTENT:
return content_length_remaining_;
default:
return 0;
}
}
void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
switch (parse_state_) {
case BalsaFrameEnums::READING_CHUNK_DATA:
if (chunk_length_remaining_ >= bytes_spliced) {
chunk_length_remaining_ -= bytes_spliced;
if (chunk_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
}
return;
} else {
last_error_ =
BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
goto error_exit;
}
case BalsaFrameEnums::READING_UNTIL_CLOSE:
return;
case BalsaFrameEnums::READING_CONTENT:
if (content_length_remaining_ >= bytes_spliced) {
content_length_remaining_ -= bytes_spliced;
if (content_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->MessageDone();
}
return;
} else {
last_error_ =
BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
goto error_exit;
}
default:
last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;
goto error_exit;
}
error_exit:
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
visitor_->HandleBodyError(this);
};
size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
const char* current = input;
const char* on_entry = current;
const char* end = current + size;
#if DEBUGFRAMER
LOG(INFO) << "\n=============="
<< BalsaFrameEnums::ParseStateToString(parse_state_)
<< "===============\n";
#endif
DCHECK(headers_ != NULL);
if (headers_ == NULL) return 0;
if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
if (header_length > max_header_length_ ||
(header_length == max_header_length_ && size > 0)) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
visitor_->HandleHeaderError(this);
goto bottom;
}
size_t bytes_to_process = max_header_length_ - header_length;
if (bytes_to_process > size) {
bytes_to_process = size;
}
current += ProcessHeaders(input, bytes_to_process);
if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
const size_t header_length_after =
headers_->GetReadableBytesFromHeaderStream();
if (header_length_after >= max_header_length_) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
visitor_->HandleHeaderError(this);
}
}
goto bottom;
} else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
goto bottom;
}
while (current < end) {
switch (parse_state_) {
label_reading_chunk_length:
case BalsaFrameEnums::READING_CHUNK_LENGTH:
{
static const signed char buf[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
while (current < end) {
const char c = *current;
++current;
const signed char addition = buf[static_cast<int>(c)];
if (addition >= 0) {
chunk_length_character_extracted_ = true;
size_t length_x_16 = chunk_length_remaining_ * 16;
const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
if ((chunk_length_remaining_ > kMaxDiv16) ||
((std::numeric_limits<size_t>::max() - length_x_16) <
static_cast<size_t>(addition))) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;
visitor_->ProcessBodyInput(on_entry, current - on_entry);
visitor_->HandleChunkingError(this);
goto bottom;
}
chunk_length_remaining_ = length_x_16 + addition;
continue;
}
if (!chunk_length_character_extracted_ || addition == -1) {
parse_state_ = BalsaFrameEnums::PARSE_ERROR;
last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;
visitor_->ProcessBodyInput(on_entry, current - on_entry);
visitor_->HandleChunkingError(this);
goto bottom;
}
--current;
parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
visitor_->ProcessChunkLength(chunk_length_remaining_);
goto label_reading_chunk_extension;
}
}
visitor_->ProcessBodyInput(on_entry, current - on_entry);
goto bottom;
label_reading_chunk_extension:
case BalsaFrameEnums::READING_CHUNK_EXTENSION:
{
const char* extensions_start = current;
size_t extensions_length = 0;
while (current < end) {
const char c = *current;
if (c == '\r' || c == '\n') {
extensions_length =
(extensions_start == current) ?
0 :
current - extensions_start - 1;
}
++current;
if (c == '\n') {
chunk_length_character_extracted_ = false;
visitor_->ProcessChunkExtensions(
extensions_start, extensions_length);
if (chunk_length_remaining_ != 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
goto label_reading_chunk_data;
}
HeaderFramingFound('\n');
parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
goto label_reading_last_chunk_term;
}
}
visitor_->ProcessChunkExtensions(
extensions_start, extensions_length);
}
visitor_->ProcessBodyInput(on_entry, current - on_entry);
goto bottom;
label_reading_chunk_data:
case BalsaFrameEnums::READING_CHUNK_DATA:
while (current < end) {
if (chunk_length_remaining_ == 0) {
break;
}
size_t bytes_remaining = end - current;
size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?
chunk_length_remaining_ : bytes_remaining;
const char* tmp_current = current + consumed_bytes;
visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);
visitor_->ProcessBodyData(current, consumed_bytes);
on_entry = current = tmp_current;
chunk_length_remaining_ -= consumed_bytes;
}
if (chunk_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
goto label_reading_chunk_term;
}
visitor_->ProcessBodyInput(on_entry, current - on_entry);
goto bottom;
label_reading_chunk_term:
case BalsaFrameEnums::READING_CHUNK_TERM:
while (current < end) {
const char c = *current;
++current;
if (c == '\n') {
parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
goto label_reading_chunk_length;
}
}
visitor_->ProcessBodyInput(on_entry, current - on_entry);
goto bottom;
label_reading_last_chunk_term:
case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
while (current < end) {
const char c = *current;
if (!HeaderFramingFound(c)) {
if (HeaderFramingMayBeFound()) {
++current;
continue;
} else {
parse_state_ = BalsaFrameEnums::READING_TRAILER;
visitor_->ProcessBodyInput(on_entry, current - on_entry);
on_entry = current;
goto label_reading_trailer;
}
} else {
++current;
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->ProcessBodyInput(on_entry, current - on_entry);
visitor_->MessageDone();
goto bottom;
}
break;
}
visitor_->ProcessBodyInput(on_entry, current - on_entry);
goto bottom;
label_reading_trailer:
case BalsaFrameEnums::READING_TRAILER:
while (current < end) {
const char c = *current;
++current;
if (HeaderFramingFound(c)) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->ProcessTrailerInput(on_entry, current - on_entry);
visitor_->MessageDone();
goto bottom;
}
}
visitor_->ProcessTrailerInput(on_entry, current - on_entry);
break;
case BalsaFrameEnums::READING_UNTIL_CLOSE:
{
const size_t bytes_remaining = end - current;
if (bytes_remaining > 0) {
visitor_->ProcessBodyInput(current, bytes_remaining);
visitor_->ProcessBodyData(current, bytes_remaining);
current += bytes_remaining;
}
}
goto bottom;
case BalsaFrameEnums::READING_CONTENT:
#if DEBUGFRAMER
LOG(INFO) << "ReadingContent: " << content_length_remaining_;
#endif
while (content_length_remaining_ && current < end) {
const size_t bytes_remaining = end - current;
const size_t consumed_bytes =
(content_length_remaining_ < bytes_remaining) ?
content_length_remaining_ : bytes_remaining;
visitor_->ProcessBodyInput(current, consumed_bytes);
visitor_->ProcessBodyData(current, consumed_bytes);
current += consumed_bytes;
content_length_remaining_ -= consumed_bytes;
}
if (content_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->MessageDone();
}
goto bottom;
default:
LOG(FATAL) << "Unknown state: " << parse_state_
<< " memory corruption?!";
}
}
bottom:
#if DEBUGFRAMER
LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
<< std::string(input, current)
<< "\n$$$$$$$$$$$$$$"
<< BalsaFrameEnums::ParseStateToString(parse_state_)
<< "$$$$$$$$$$$$$$$"
<< " consumed: " << (current - input);
if (Error()) {
LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());
}
#endif
return current - input;
}
const uint32 BalsaFrame::kValidTerm1;
const uint32 BalsaFrame::kValidTerm1Mask;
const uint32 BalsaFrame::kValidTerm2;
const uint32 BalsaFrame::kValidTerm2Mask;
}