This source file includes following definitions.
- IsRemovableURLWhitespace
- DoRemoveURLWhitespace
- IsSchemeFirstChar
- DoScheme
- DoUserInfo
- WritePortInt
- DoPort
- DoCanonicalizeRef
- RemoveURLWhitespace
- RemoveURLWhitespace
- CanonicalSchemeChar
- CanonicalizeScheme
- CanonicalizeScheme
- CanonicalizeUserInfo
- CanonicalizeUserInfo
- CanonicalizePort
- CanonicalizePort
- CanonicalizeRef
- CanonicalizeRef
#include <string.h>
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
namespace url_canon {
namespace {
inline bool IsRemovableURLWhitespace(int ch) {
return ch == '\r' || ch == '\n' || ch == '\t';
}
template<typename CHAR>
const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len,
CanonOutputT<CHAR>* buffer,
int* output_len) {
int found_whitespace = false;
for (int i = 0; i < input_len; i++) {
if (!IsRemovableURLWhitespace(input[i]))
continue;
found_whitespace = true;
break;
}
if (!found_whitespace) {
*output_len = input_len;
return input;
}
for (int i = 0; i < input_len; i++) {
if (!IsRemovableURLWhitespace(input[i]))
buffer->push_back(input[i]);
}
*output_len = buffer->length();
return buffer->data();
}
const char kSchemeCanonical[0x80] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '+', 0, '-', '.', 0,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0 , 0, 0 , 0,
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 };
inline bool IsSchemeFirstChar(unsigned char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
template<typename CHAR, typename UCHAR>
bool DoScheme(const CHAR* spec,
const url_parse::Component& scheme,
CanonOutput* output,
url_parse::Component* out_scheme) {
if (scheme.len <= 0) {
*out_scheme = url_parse::Component(output->length(), 0);
output->push_back(':');
return true;
}
out_scheme->begin = output->length();
bool success = true;
int end = scheme.end();
for (int i = scheme.begin; i < end; i++) {
UCHAR ch = static_cast<UCHAR>(spec[i]);
char replacement = 0;
if (ch < 0x80) {
if (i == scheme.begin) {
if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
replacement = kSchemeCanonical[ch];
} else {
replacement = kSchemeCanonical[ch];
}
}
if (replacement) {
output->push_back(replacement);
} else if (ch == '%') {
success = false;
output->push_back('%');
} else {
success = false;
AppendUTF8EscapedChar(spec, &i, end, output);
}
}
out_scheme->len = output->length() - out_scheme->begin;
output->push_back(':');
return success;
}
template<typename CHAR, typename UCHAR>
bool DoUserInfo(const CHAR* username_spec,
const url_parse::Component& username,
const CHAR* password_spec,
const url_parse::Component& password,
CanonOutput* output,
url_parse::Component* out_username,
url_parse::Component* out_password) {
if (username.len <= 0 && password.len <= 0) {
*out_username = url_parse::Component();
*out_password = url_parse::Component();
return true;
}
out_username->begin = output->length();
if (username.len > 0) {
AppendStringOfType(&username_spec[username.begin], username.len,
CHAR_USERINFO, output);
}
out_username->len = output->length() - out_username->begin;
if (password.len > 0) {
output->push_back(':');
out_password->begin = output->length();
AppendStringOfType(&password_spec[password.begin], password.len,
CHAR_USERINFO, output);
out_password->len = output->length() - out_password->begin;
} else {
*out_password = url_parse::Component();
}
output->push_back('@');
return true;
}
inline void WritePortInt(char* output, int output_len, int port) {
_itoa_s(port, output, output_len, 10);
}
template<typename CHAR, typename UCHAR>
bool DoPort(const CHAR* spec,
const url_parse::Component& port,
int default_port_for_scheme,
CanonOutput* output,
url_parse::Component* out_port) {
int port_num = url_parse::ParsePort(spec, port);
if (port_num == url_parse::PORT_UNSPECIFIED ||
port_num == default_port_for_scheme) {
*out_port = url_parse::Component();
return true;
}
if (port_num == url_parse::PORT_INVALID) {
output->push_back(':');
out_port->begin = output->length();
AppendInvalidNarrowString(spec, port.begin, port.end(), output);
out_port->len = output->length() - out_port->begin;
return false;
}
const int buf_size = 6;
char buf[buf_size];
WritePortInt(buf, buf_size, port_num);
output->push_back(':');
out_port->begin = output->length();
for (int i = 0; i < buf_size && buf[i]; i++)
output->push_back(buf[i]);
out_port->len = output->length() - out_port->begin;
return true;
}
template<typename CHAR, typename UCHAR>
void DoCanonicalizeRef(const CHAR* spec,
const url_parse::Component& ref,
CanonOutput* output,
url_parse::Component* out_ref) {
if (ref.len < 0) {
*out_ref = url_parse::Component();
return;
}
output->push_back('#');
out_ref->begin = output->length();
int end = ref.end();
for (int i = ref.begin; i < end; i++) {
if (spec[i] == 0) {
continue;
} else if (static_cast<UCHAR>(spec[i]) < 0x20) {
AppendEscapedChar(static_cast<unsigned char>(spec[i]), output);
} else if (static_cast<UCHAR>(spec[i]) < 0x80) {
output->push_back(static_cast<char>(spec[i]));
} else {
unsigned code_point;
ReadUTFChar(spec, &i, end, &code_point);
AppendUTF8Value(code_point, output);
}
}
out_ref->len = output->length() - out_ref->begin;
}
}
const char* RemoveURLWhitespace(const char* input, int input_len,
CanonOutputT<char>* buffer,
int* output_len) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
}
const base::char16* RemoveURLWhitespace(const base::char16* input,
int input_len,
CanonOutputT<base::char16>* buffer,
int* output_len) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
}
char CanonicalSchemeChar(base::char16 ch) {
if (ch >= 0x80)
return 0;
return kSchemeCanonical[ch];
}
bool CanonicalizeScheme(const char* spec,
const url_parse::Component& scheme,
CanonOutput* output,
url_parse::Component* out_scheme) {
return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
}
bool CanonicalizeScheme(const base::char16* spec,
const url_parse::Component& scheme,
CanonOutput* output,
url_parse::Component* out_scheme) {
return DoScheme<base::char16, base::char16>(spec, scheme, output, out_scheme);
}
bool CanonicalizeUserInfo(const char* username_source,
const url_parse::Component& username,
const char* password_source,
const url_parse::Component& password,
CanonOutput* output,
url_parse::Component* out_username,
url_parse::Component* out_password) {
return DoUserInfo<char, unsigned char>(
username_source, username, password_source, password,
output, out_username, out_password);
}
bool CanonicalizeUserInfo(const base::char16* username_source,
const url_parse::Component& username,
const base::char16* password_source,
const url_parse::Component& password,
CanonOutput* output,
url_parse::Component* out_username,
url_parse::Component* out_password) {
return DoUserInfo<base::char16, base::char16>(
username_source, username, password_source, password,
output, out_username, out_password);
}
bool CanonicalizePort(const char* spec,
const url_parse::Component& port,
int default_port_for_scheme,
CanonOutput* output,
url_parse::Component* out_port) {
return DoPort<char, unsigned char>(spec, port,
default_port_for_scheme,
output, out_port);
}
bool CanonicalizePort(const base::char16* spec,
const url_parse::Component& port,
int default_port_for_scheme,
CanonOutput* output,
url_parse::Component* out_port) {
return DoPort<base::char16, base::char16>(spec, port, default_port_for_scheme,
output, out_port);
}
void CanonicalizeRef(const char* spec,
const url_parse::Component& ref,
CanonOutput* output,
url_parse::Component* out_ref) {
DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
}
void CanonicalizeRef(const base::char16* spec,
const url_parse::Component& ref,
CanonOutput* output,
url_parse::Component* out_ref) {
DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);
}
}