This source file includes following definitions.
- EstimateFinalCount
- PrefixLess
- Exists
- GetPrefixes
- LoadFile
- WriteFile
- AddRun
- GetPrefixSet
- EmitRun
- AddPrefix
#include "chrome/browser/safe_browsing/prefix_set.h"
#include <algorithm>
#include <math.h>
#include "base/file_util.h"
#include "base/files/scoped_file.h"
#include "base/logging.h"
#include "base/md5.h"
#include "base/metrics/histogram.h"
namespace {
static uint32 kMagic = 0x864088dd;
static uint32 kVersion = 0x2;
typedef struct {
uint32 magic;
uint32 version;
uint32 index_size;
uint32 deltas_size;
} FileHeader;
const SBPrefix kEstimateThreshold = 1 << 30;
size_t EstimateFinalCount(SBPrefix current_prefix, size_t current_count) {
const size_t estimated_prefix_count = static_cast<size_t>(
(static_cast<uint64>(current_count) << 32) / current_prefix);
return estimated_prefix_count + estimated_prefix_count / 100;
}
}
namespace safe_browsing {
bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) {
return a.first < b.first;
}
PrefixSet::PrefixSet() {
}
PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) {
DCHECK(index && deltas);
index_.swap(*index);
deltas_.swap(*deltas);
}
PrefixSet::~PrefixSet() {}
bool PrefixSet::Exists(SBPrefix prefix) const {
if (index_.empty())
return false;
IndexVector::const_iterator iter =
std::upper_bound(index_.begin(), index_.end(),
IndexPair(prefix, 0), PrefixLess);
if (iter == index_.begin())
return false;
const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second);
--iter;
SBPrefix current = iter->first;
if (current == prefix)
return true;
for (size_t di = iter->second; di < bound && current < prefix; ++di) {
current += deltas_[di];
}
return current == prefix;
}
void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const {
prefixes->reserve(index_.size() + deltas_.size());
for (size_t ii = 0; ii < index_.size(); ++ii) {
const size_t deltas_end =
(ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();
SBPrefix current = index_[ii].first;
prefixes->push_back(current);
for (size_t di = index_[ii].second; di < deltas_end; ++di) {
current += deltas_[di];
prefixes->push_back(current);
}
}
}
scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) {
int64 size_64;
if (!base::GetFileSize(filter_name, &size_64))
return scoped_ptr<PrefixSet>();
using base::MD5Digest;
if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))
return scoped_ptr<PrefixSet>();
base::ScopedFILE file(base::OpenFile(filter_name, "rb"));
if (!file.get())
return scoped_ptr<PrefixSet>();
FileHeader header;
size_t read = fread(&header, sizeof(header), 1, file.get());
if (read != 1)
return scoped_ptr<PrefixSet>();
if (header.magic != kMagic ||
(header.version != kVersion && header.version != 1)) {
return scoped_ptr<PrefixSet>();
}
IndexVector index;
const size_t index_bytes = sizeof(index[0]) * header.index_size;
std::vector<uint16> deltas;
const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size;
const size_t expected_bytes =
sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest);
if (static_cast<int64>(expected_bytes) != size_64)
return scoped_ptr<PrefixSet>();
base::MD5Context context;
base::MD5Init(&context);
base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
sizeof(header)));
if (header.index_size) {
index.resize(header.index_size);
read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get());
if (read != index.size())
return scoped_ptr<PrefixSet>();
base::MD5Update(&context,
base::StringPiece(reinterpret_cast<char*>(&(index[0])),
index_bytes));
}
if (header.deltas_size) {
deltas.resize(header.deltas_size);
read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get());
if (read != deltas.size())
return scoped_ptr<PrefixSet>();
base::MD5Update(&context,
base::StringPiece(reinterpret_cast<char*>(&(deltas[0])),
deltas_bytes));
}
base::MD5Digest calculated_digest;
base::MD5Final(&calculated_digest, &context);
base::MD5Digest file_digest;
read = fread(&file_digest, sizeof(file_digest), 1, file.get());
if (read != 1)
return scoped_ptr<PrefixSet>();
if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
return scoped_ptr<PrefixSet>();
if (header.version == 1) {
std::vector<SBPrefix> prefixes;
PrefixSet(&index, &deltas).GetPrefixes(&prefixes);
std::sort(prefixes.begin(), prefixes.end());
return PrefixSetBuilder(prefixes).GetPrefixSet().Pass();
}
return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas));
}
bool PrefixSet::WriteFile(const base::FilePath& filter_name) const {
FileHeader header;
header.magic = kMagic;
header.version = kVersion;
header.index_size = static_cast<uint32>(index_.size());
header.deltas_size = static_cast<uint32>(deltas_.size());
if (static_cast<size_t>(header.index_size) != index_.size() ||
static_cast<size_t>(header.deltas_size) != deltas_.size()) {
NOTREACHED();
return false;
}
base::ScopedFILE file(base::OpenFile(filter_name, "wb"));
if (!file.get())
return false;
base::MD5Context context;
base::MD5Init(&context);
size_t written = fwrite(&header, sizeof(header), 1, file.get());
if (written != 1)
return false;
base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
sizeof(header)));
if (index_.size()) {
const size_t index_bytes = sizeof(index_[0]) * index_.size();
written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(),
file.get());
if (written != index_.size())
return false;
base::MD5Update(&context,
base::StringPiece(
reinterpret_cast<const char*>(&(index_[0])),
index_bytes));
}
if (deltas_.size()) {
const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size();
written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(),
file.get());
if (written != deltas_.size())
return false;
base::MD5Update(&context,
base::StringPiece(
reinterpret_cast<const char*>(&(deltas_[0])),
deltas_bytes));
}
base::MD5Digest digest;
base::MD5Final(&digest, &context);
written = fwrite(&digest, sizeof(digest), 1, file.get());
if (written != 1)
return false;
file.reset();
return true;
}
void PrefixSet::AddRun(SBPrefix index_prefix,
const uint16* run_begin, const uint16* run_end) {
if (index_prefix > kEstimateThreshold &&
deltas_.capacity() < deltas_.size() + (run_end - run_begin)) {
deltas_.reserve(EstimateFinalCount(index_prefix, deltas_.size()));
}
index_.push_back(std::make_pair(index_prefix, deltas_.size()));
deltas_.insert(deltas_.end(), run_begin, run_end);
}
PrefixSetBuilder::PrefixSetBuilder()
: prefix_set_(new PrefixSet()) {
}
PrefixSetBuilder::PrefixSetBuilder(const std::vector<SBPrefix>& prefixes)
: prefix_set_(new PrefixSet()) {
for (size_t i = 0; i < prefixes.size(); ++i) {
AddPrefix(prefixes[i]);
}
}
PrefixSetBuilder::~PrefixSetBuilder() {
}
scoped_ptr<PrefixSet> PrefixSetBuilder::GetPrefixSet() {
DCHECK(prefix_set_.get());
while (!buffer_.empty()) {
EmitRun();
}
PrefixSet::IndexVector(prefix_set_->index_).swap(prefix_set_->index_);
return prefix_set_.Pass();
}
void PrefixSetBuilder::EmitRun() {
DCHECK(prefix_set_.get());
SBPrefix prev_prefix = buffer_[0];
uint16 run[PrefixSet::kMaxRun];
size_t run_pos = 0;
size_t i;
for (i = 1; i < buffer_.size() && run_pos < PrefixSet::kMaxRun; ++i) {
DCHECK_GT(buffer_[i], prev_prefix);
const unsigned delta = buffer_[i] - prev_prefix;
const uint16 delta16 = static_cast<uint16>(delta);
if (delta != static_cast<unsigned>(delta16))
break;
run[run_pos++] = delta16;
DCHECK_EQ(static_cast<unsigned>(run[run_pos - 1]), delta);
prev_prefix = buffer_[i];
}
prefix_set_->AddRun(buffer_[0], run, run + run_pos);
buffer_.erase(buffer_.begin(), buffer_.begin() + i);
}
void PrefixSetBuilder::AddPrefix(SBPrefix prefix) {
DCHECK(prefix_set_.get());
if (buffer_.empty()) {
DCHECK(prefix_set_->index_.empty());
DCHECK(prefix_set_->deltas_.empty());
} else {
if (buffer_.back() == prefix)
return;
DCHECK_LT(buffer_.back(), prefix);
}
buffer_.push_back(prefix);
if (buffer_.size() > PrefixSet::kMaxRun + 2)
EmitRun();
}
}