This source file includes following definitions.
- WriteRules
- NormalizeRule
- NormalizeDataToRuleMap
- NormalizeFile
#include "net/tools/tld_cleanup/tld_cleanup_util.h"
#include "base/file_util.h"
#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "url/gurl.h"
#include "url/url_parse.h"
namespace {
const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS===";
const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS===";
const int kExceptionRule = 1;
const int kWildcardRule = 2;
const int kPrivateRule = 4;
}
namespace net {
namespace tld_cleanup {
bool WriteRules(const RuleMap& rules, const base::FilePath& outfile) {
std::string data;
data.append("%{\n"
"// Copyright 2012 The Chromium Authors. All rights reserved.\n"
"// Use of this source code is governed by a BSD-style license "
"that can be\n"
"// found in the LICENSE file.\n\n"
"// This file is generated by net/tools/tld_cleanup/.\n"
"// DO NOT MANUALLY EDIT!\n"
"%}\n"
"struct DomainRule {\n"
" int name_offset;\n"
" int type; // flags: 1: exception, 2: wildcard, 4: private\n"
"};\n"
"%%\n");
for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) {
data.append(i->first);
data.append(", ");
int type = 0;
if (i->second.exception) {
type = kExceptionRule;
} else if (i->second.wildcard) {
type = kWildcardRule;
}
if (i->second.is_private) {
type += kPrivateRule;
}
data.append(base::IntToString(type));
data.append("\n");
}
data.append("%%\n");
int written = base::WriteFile(outfile,
data.data(),
static_cast<int>(data.size()));
return written == static_cast<int>(data.size());
}
NormalizeResult NormalizeRule(std::string* domain, Rule* rule) {
NormalizeResult result = kSuccess;
if (domain->at(0) == '.')
domain->erase(0, 1);
if (domain->empty()) {
LOG(WARNING) << "Ignoring empty rule";
return kWarning;
}
if (domain->at(domain->size() - 1) == '.')
domain->erase(domain->size() - 1, 1);
if (domain->empty()) {
LOG(WARNING) << "Ignoring empty rule";
return kWarning;
}
size_t start_offset = 0;
if (domain->at(0) == '!') {
domain->erase(0, 1);
rule->exception = true;
} else if (domain->find("*.") == 0) {
domain->erase(0, 2);
rule->wildcard = true;
}
if (domain->empty()) {
LOG(WARNING) << "Ignoring empty rule";
return kWarning;
}
if (domain->find("*.", start_offset) != std::string::npos ||
domain->find('!', start_offset) != std::string::npos) {
LOG(WARNING) << "Keeping probably invalid rule: " << *domain;
result = kWarning;
}
std::string url = "http://";
url.append(*domain);
GURL gurl(url);
const std::string& spec = gurl.possibly_invalid_spec();
url_parse::Component host = gurl.parsed_for_possibly_invalid_spec().host;
if (host.len < 0) {
LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *domain;
return kError;
}
if (!gurl.is_valid()) {
LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *domain;
result = kWarning;
}
domain->assign(spec.substr(host.begin, host.len));
return result;
}
NormalizeResult NormalizeDataToRuleMap(const std::string data,
RuleMap* rules) {
CHECK(rules);
std::string domain;
NormalizeResult result = kSuccess;
size_t line_start = 0;
size_t line_end = 0;
bool is_private = false;
RuleMap extra_rules;
int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1;
int end_private_length = arraysize(kEndPrivateDomainsComment) - 1;
while (line_start < data.size()) {
if (line_start + begin_private_length < data.size() &&
!data.compare(line_start, begin_private_length,
kBeginPrivateDomainsComment)) {
is_private = true;
line_end = line_start + begin_private_length;
} else if (line_start + end_private_length < data.size() &&
!data.compare(line_start, end_private_length,
kEndPrivateDomainsComment)) {
is_private = false;
line_end = line_start + end_private_length;
} else if (line_start + 1 < data.size() &&
data[line_start] == '/' &&
data[line_start + 1] == '/') {
line_end = data.find_first_of("\r\n", line_start);
if (line_end == std::string::npos)
line_end = data.size();
} else {
line_end = data.find_first_of("\r\n \t", line_start);
if (line_end == std::string::npos)
line_end = data.size();
domain.assign(data.data(), line_start, line_end - line_start);
Rule rule;
rule.wildcard = false;
rule.exception = false;
rule.is_private = is_private;
NormalizeResult new_result = NormalizeRule(&domain, &rule);
if (new_result != kError) {
CHECK(rules->find(domain) == rules->end())
<< "Duplicate rule found for " << domain;
(*rules)[domain] = rule;
size_t tld_start = domain.find_last_of('.');
if (tld_start != std::string::npos && tld_start + 1 < domain.size()) {
std::string extra_rule_domain = domain.substr(tld_start + 1);
RuleMap::const_iterator iter = extra_rules.find(extra_rule_domain);
Rule extra_rule;
extra_rule.exception = false;
extra_rule.wildcard = false;
if (iter == extra_rules.end()) {
extra_rule.is_private = is_private;
} else {
extra_rule.is_private = is_private && iter->second.is_private;
}
extra_rules[extra_rule_domain] = extra_rule;
}
}
result = std::max(result, new_result);
}
line_start = data.find_first_of("\r\n", line_end);
if (line_start == std::string::npos)
line_start = data.size();
line_start = data.find_first_not_of("\r\n", line_start);
if (line_start == std::string::npos)
line_start = data.size();
}
for (RuleMap::const_iterator iter = extra_rules.begin();
iter != extra_rules.end();
++iter) {
if (rules->find(iter->first) == rules->end()) {
(*rules)[iter->first] = iter->second;
}
}
return result;
}
NormalizeResult NormalizeFile(const base::FilePath& in_filename,
const base::FilePath& out_filename) {
RuleMap rules;
std::string data;
if (!base::ReadFileToString(in_filename, &data)) {
LOG(ERROR) << "Unable to read file";
return kSuccess;
}
NormalizeResult result = NormalizeDataToRuleMap(data, &rules);
if (!WriteRules(rules, out_filename)) {
LOG(ERROR) << "Error(s) writing output file";
result = kError;
}
return result;
}
}
}