This source file includes following definitions.
- IntToHex
- Escape
- UnescapeUnsignedCharAtIndex
- UnescapeURLWithOffsetsImpl
- AppendEscapedCharForHTMLImpl
- EscapeForHTMLImpl
- EscapeQueryParamValue
- EscapePath
- EscapeUrlEncodedData
- EscapeNonASCII
- EscapeExternalHandlerValue
- AppendEscapedCharForHTML
- EscapeForHTML
- EscapeForHTML
- UnescapeURLComponent
- UnescapeURLComponent
- UnescapeAndDecodeUTF8URLComponent
- UnescapeAndDecodeUTF8URLComponentWithOffsets
- UnescapeForHTML
#include "net/base/escape.h"
#include <algorithm>
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_offset_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
namespace net {
namespace {
const char kHexString[] = "0123456789ABCDEF";
inline char IntToHex(int i) {
DCHECK_GE(i, 0) << i << " not a hex value";
DCHECK_LE(i, 15) << i << " not a hex value";
return kHexString[i];
}
struct Charmap {
bool Contains(unsigned char c) const {
return ((map[c >> 5] & (1 << (c & 31))) != 0);
}
uint32 map[8];
};
std::string Escape(const std::string& text, const Charmap& charmap,
bool use_plus) {
std::string escaped;
escaped.reserve(text.length() * 3);
for (unsigned int i = 0; i < text.length(); ++i) {
unsigned char c = static_cast<unsigned char>(text[i]);
if (use_plus && ' ' == c) {
escaped.push_back('+');
} else if (charmap.Contains(c)) {
escaped.push_back('%');
escaped.push_back(IntToHex(c >> 4));
escaped.push_back(IntToHex(c & 0xf));
} else {
escaped.push_back(c);
}
}
return escaped;
}
const char kUrlUnescape[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
};
template<typename STR>
bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,
size_t index,
unsigned char* value) {
if ((index + 2) >= escaped_text.size())
return false;
if (escaped_text[index] != '%')
return false;
const typename STR::value_type most_sig_digit(
static_cast<typename STR::value_type>(escaped_text[index + 1]));
const typename STR::value_type least_sig_digit(
static_cast<typename STR::value_type>(escaped_text[index + 2]));
if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
*value = HexDigitToInt(most_sig_digit) * 16 +
HexDigitToInt(least_sig_digit);
return true;
}
return false;
}
template<typename STR>
STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
UnescapeRule::Type rules,
std::vector<size_t>* offsets_for_adjustment) {
if (offsets_for_adjustment) {
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
base::LimitOffset<STR>(escaped_text.length()));
}
if (rules == UnescapeRule::NONE)
return escaped_text;
STR result;
result.reserve(escaped_text.length());
net::internal::AdjustEncodingOffset::Adjustments adjustments;
for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
result.push_back(escaped_text[i]);
continue;
}
unsigned char first_byte;
if (UnescapeUnsignedCharAtIndex(escaped_text, i, &first_byte)) {
unsigned char second_byte;
if ((first_byte == 0xD8) &&
UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
(second_byte == 0x9c)) {
result.append(escaped_text, i, 6);
i += 5;
continue;
}
if ((first_byte == 0xE2) &&
UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
((second_byte == 0x80) || (second_byte == 0x81))) {
unsigned char third_byte;
if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
((second_byte == 0x80) ?
((third_byte == 0x8E) || (third_byte == 0x8F) ||
((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
result.append(escaped_text, i, 9);
i += 8;
continue;
}
}
if (first_byte >= 0x80 ||
(kUrlUnescape[first_byte] ||
(first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
(first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
(first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
adjustments.push_back(i);
result.push_back(first_byte);
i += 2;
} else {
result.push_back('%');
}
} else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
escaped_text[i] == '+') {
result.push_back(' ');
} else {
result.push_back(escaped_text[i]);
}
}
if (offsets_for_adjustment && !adjustments.empty()) {
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
net::internal::AdjustEncodingOffset(adjustments));
}
return result;
}
template <class str>
void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
static const struct {
char key;
const char* replacement;
} kCharsToEscape[] = {
{ '<', "<" },
{ '>', ">" },
{ '&', "&" },
{ '"', """ },
{ '\'', "'" },
};
size_t k;
for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) {
if (c == kCharsToEscape[k].key) {
const char* p = kCharsToEscape[k].replacement;
while (*p)
output->push_back(*p++);
break;
}
}
if (k == ARRAYSIZE_UNSAFE(kCharsToEscape))
output->push_back(c);
}
template <class str>
str EscapeForHTMLImpl(const str& input) {
str result;
result.reserve(input.size());
for (typename str::const_iterator i = input.begin(); i != input.end(); ++i)
AppendEscapedCharForHTMLImpl(*i, &result);
return result;
}
static const Charmap kQueryCharmap = {{
0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
}};
static const Charmap kPathCharmap = {{
0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L,
0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
}};
static const Charmap kUrlEscape = {{
0xffffffffL, 0xf80008fdL, 0x78000001L, 0xb8000001L,
0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
}};
static const Charmap kNonASCIICharmap = {{
0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L,
0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
}};
static const Charmap kExternalHandlerCharmap = {{
0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L,
0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
}};
}
std::string EscapeQueryParamValue(const std::string& text, bool use_plus) {
return Escape(text, kQueryCharmap, use_plus);
}
std::string EscapePath(const std::string& path) {
return Escape(path, kPathCharmap, false);
}
std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) {
return Escape(path, kUrlEscape, use_plus);
}
std::string EscapeNonASCII(const std::string& input) {
return Escape(input, kNonASCIICharmap, false);
}
std::string EscapeExternalHandlerValue(const std::string& text) {
return Escape(text, kExternalHandlerCharmap, false);
}
void AppendEscapedCharForHTML(char c, std::string* output) {
AppendEscapedCharForHTMLImpl(c, output);
}
std::string EscapeForHTML(const std::string& input) {
return EscapeForHTMLImpl(input);
}
base::string16 EscapeForHTML(const base::string16& input) {
return EscapeForHTMLImpl(input);
}
std::string UnescapeURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules) {
return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL);
}
base::string16 UnescapeURLComponent(const base::string16& escaped_text,
UnescapeRule::Type rules) {
return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL);
}
base::string16 UnescapeAndDecodeUTF8URLComponent(
const std::string& text,
UnescapeRule::Type rules,
size_t* offset_for_adjustment) {
std::vector<size_t> offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
base::string16 result =
UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
if (offset_for_adjustment)
*offset_for_adjustment = offsets[0];
return result;
}
base::string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
const std::string& text,
UnescapeRule::Type rules,
std::vector<size_t>* offsets_for_adjustment) {
base::string16 result;
std::vector<size_t> original_offsets;
if (offsets_for_adjustment)
original_offsets = *offsets_for_adjustment;
std::string unescaped_url(
UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
if (base::UTF8ToUTF16AndAdjustOffsets(unescaped_url.data(),
unescaped_url.length(),
&result, offsets_for_adjustment))
return result;
if (offsets_for_adjustment)
*offsets_for_adjustment = original_offsets;
return base::UTF8ToUTF16AndAdjustOffsets(text, offsets_for_adjustment);
}
base::string16 UnescapeForHTML(const base::string16& input) {
static const struct {
const char* ampersand_code;
const char replacement;
} kEscapeToChars[] = {
{ "<", '<' },
{ ">", '>' },
{ "&", '&' },
{ """, '"' },
{ "'", '\''},
};
if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)
return input;
base::string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)];
base::string16 text(input);
for (base::string16::iterator iter = text.begin();
iter != text.end(); ++iter) {
if (*iter == '&') {
size_t index = iter - text.begin();
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) {
if (ampersand_chars[i].empty()) {
ampersand_chars[i] =
base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);
}
if (text.find(ampersand_chars[i], index) == index) {
text.replace(iter, iter + ampersand_chars[i].length(),
1, kEscapeToChars[i].replacement);
break;
}
}
}
}
return text;
}
namespace internal {
AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
: adjustments(adjustments) {}
void AdjustEncodingOffset::operator()(size_t& offset) {
if (offset == base::string16::npos)
return;
size_t adjusted_offset = offset;
for (Adjustments::const_iterator i = adjustments.begin();
i != adjustments.end(); ++i) {
size_t location = *i;
if (offset <= location) {
offset = adjusted_offset;
return;
}
if (offset <= (location + 2)) {
offset = base::string16::npos;
return;
}
adjusted_offset -= 2;
}
offset = adjusted_offset;
}
}
}