This source file includes following definitions.
- ClassifyAfterDot
- BackUpToPreviousSlash
- DoPartialPath
- DoPath
- CanonicalizePath
- CanonicalizePath
- CanonicalizePartialPath
- CanonicalizePartialPath
#include "base/logging.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_parse_internal.h"
namespace url_canon {
namespace {
enum CharacterFlags {
PASS = 0,
SPECIAL = 1,
ESCAPE_BIT = 2,
ESCAPE = ESCAPE_BIT | SPECIAL,
UNESCAPE = 4,
INVALID_BIT = 8,
INVALID = INVALID_BIT | SPECIAL,
};
const unsigned char kPathCharLookup[0x100] = {
INVALID, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, PASS, ESCAPE, ESCAPE, PASS, ESCAPE, PASS, PASS, PASS, PASS, PASS, PASS, PASS, UNESCAPE,SPECIAL, PASS,
UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, PASS, ESCAPE, PASS, ESCAPE, ESCAPE,
PASS, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, ESCAPE, PASS, ESCAPE, UNESCAPE,
ESCAPE, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,ESCAPE, ESCAPE, ESCAPE, UNESCAPE,ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE};
enum DotDisposition {
NOT_A_DIRECTORY,
DIRECTORY_CUR,
DIRECTORY_UP
};
template<typename CHAR>
DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot,
int end, int* consumed_len) {
if (after_dot == end) {
*consumed_len = 0;
return DIRECTORY_CUR;
}
if (url_parse::IsURLSlash(spec[after_dot])) {
*consumed_len = 1;
return DIRECTORY_CUR;
}
int second_dot_len = IsDot(spec, after_dot, end);
if (second_dot_len) {
int after_second_dot = after_dot + second_dot_len;
if (after_second_dot == end) {
*consumed_len = second_dot_len;
return DIRECTORY_UP;
}
if (url_parse::IsURLSlash(spec[after_second_dot])) {
*consumed_len = second_dot_len + 1;
return DIRECTORY_UP;
}
}
*consumed_len = 0;
return NOT_A_DIRECTORY;
}
void BackUpToPreviousSlash(int path_begin_in_output,
CanonOutput* output) {
DCHECK(output->length() > 0);
int i = output->length() - 1;
DCHECK(output->at(i) == '/');
if (i == path_begin_in_output)
return;
i--;
while (output->at(i) != '/' && i > path_begin_in_output)
i--;
output->set_length(i + 1);
}
template<typename CHAR, typename UCHAR>
bool DoPartialPath(const CHAR* spec,
const url_parse::Component& path,
int path_begin_in_output,
CanonOutput* output) {
int end = path.end();
bool success = true;
for (int i = path.begin; i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) {
success &= AppendUTF8EscapedChar(spec, &i, end, output);
} else {
unsigned char out_ch = static_cast<unsigned char>(uch);
unsigned char flags = kPathCharLookup[out_ch];
if (flags & SPECIAL) {
int dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
DCHECK(output->length() > path_begin_in_output);
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
int consumed_len;
switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
&consumed_len)) {
case NOT_A_DIRECTORY:
output->push_back('.');
i += dotlen - 1;
break;
case DIRECTORY_CUR:
i += dotlen + consumed_len - 1;
break;
case DIRECTORY_UP:
BackUpToPreviousSlash(path_begin_in_output, output);
i += dotlen + consumed_len - 1;
break;
}
} else {
output->push_back('.');
i += dotlen - 1;
}
} else if (out_ch == '\\') {
output->push_back('/');
} else if (out_ch == '%') {
unsigned char unescaped_value;
if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
char unescaped_flags = kPathCharLookup[unescaped_value];
if (unescaped_flags & UNESCAPE) {
output->push_back(unescaped_value);
} else if (unescaped_flags & INVALID_BIT) {
output->push_back('%');
output->push_back(static_cast<char>(spec[i - 1]));
output->push_back(static_cast<char>(spec[i]));
success = false;
} else {
output->push_back('%');
output->push_back(static_cast<char>(spec[i - 1]));
output->push_back(static_cast<char>(spec[i]));
}
} else {
output->push_back('%');
}
} else if (flags & INVALID_BIT) {
AppendEscapedChar(out_ch, output);
success = false;
} else if (flags & ESCAPE_BIT) {
AppendEscapedChar(out_ch, output);
}
} else {
output->push_back(out_ch);
}
}
}
return success;
}
template<typename CHAR, typename UCHAR>
bool DoPath(const CHAR* spec,
const url_parse::Component& path,
CanonOutput* output,
url_parse::Component* out_path) {
bool success = true;
out_path->begin = output->length();
if (path.len > 0) {
if (!url_parse::IsURLSlash(spec[path.begin]))
output->push_back('/');
success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output);
} else {
output->push_back('/');
}
out_path->len = output->length() - out_path->begin;
return success;
}
}
bool CanonicalizePath(const char* spec,
const url_parse::Component& path,
CanonOutput* output,
url_parse::Component* out_path) {
return DoPath<char, unsigned char>(spec, path, output, out_path);
}
bool CanonicalizePath(const base::char16* spec,
const url_parse::Component& path,
CanonOutput* output,
url_parse::Component* out_path) {
return DoPath<base::char16, base::char16>(spec, path, output, out_path);
}
bool CanonicalizePartialPath(const char* spec,
const url_parse::Component& path,
int path_begin_in_output,
CanonOutput* output) {
return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output,
output);
}
bool CanonicalizePartialPath(const base::char16* spec,
const url_parse::Component& path,
int path_begin_in_output,
CanonOutput* output) {
return DoPartialPath<base::char16, base::char16>(spec, path,
path_begin_in_output,
output);
}
}