This source file includes following definitions.
- LengthGreater
- succeeded_
- RunOnDBThread
- DoneRunOnMainThread
- post_scoring_item_count_
- HistoryItemsForTerms
- UpdateURL
- UpdateRecentVisits
- ScheduleUpdateRecentVisits
- DeleteURL
- RestoreFromFile
- RebuildFromHistory
- WritePrivateDataToCacheFileTask
- CancelPendingUpdates
- Duplicate
- Empty
- Clear
- HistoryIDSetFromWords
- HistoryIDsForTerm
- WordIDSetForTermChars
- IndexRow
- AddRowWordsToIndex
- AddWordToIndex
- AddWordHistory
- UpdateWordHistory
- AddToHistoryIDWordMap
- RemoveRowFromIndex
- RemoveRowWordsFromIndex
- ResetSearchTermCache
- SaveToFile
- SavePrivateData
- SaveWordList
- SaveWordMap
- SaveCharWordMap
- SaveWordIDHistoryMap
- SaveHistoryInfoMap
- SaveWordStartsMap
- RestorePrivateData
- RestoreWordList
- RestoreWordMap
- RestoreCharWordMap
- RestoreWordIDHistoryMap
- RestoreHistoryInfoMap
- RestoreWordStartsMap
- URLSchemeIsWhitelisted
- used_
- now_
#include "chrome/browser/history/url_index_private_data.h"
#include <algorithm>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <string>
#include <vector>
#include "base/basictypes.h"
#include "base/file_util.h"
#include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "chrome/browser/autocomplete/autocomplete_provider.h"
#include "chrome/browser/autocomplete/url_prefix.h"
#include "chrome/browser/bookmarks/bookmark_service.h"
#include "chrome/browser/history/history_database.h"
#include "chrome/browser/history/history_db_task.h"
#include "chrome/browser/history/history_service.h"
#include "chrome/browser/history/in_memory_url_index.h"
#include "content/public/browser/notification_details.h"
#include "content/public/browser/notification_service.h"
#include "content/public/browser/notification_source.h"
#include "net/base/net_util.h"
#if defined(USE_SYSTEM_PROTOBUF)
#include <google/protobuf/repeated_field.h>
#else
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
#endif
using google::protobuf::RepeatedField;
using google::protobuf::RepeatedPtrField;
using in_memory_url_index::InMemoryURLIndexCacheItem;
namespace {
static const size_t kMaxVisitsToStoreInCache = 10u;
}
namespace history {
typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;
typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry
CharWordMapEntry;
typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
WordIDHistoryMapItem;
typedef imui::
InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
WordIDHistoryMapEntry;
typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
HistoryInfoMapEntry;
typedef imui::
InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo
HistoryInfoMapEntry_VisitInfo;
typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;
typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
WordStartsMapEntry;
bool LengthGreater(const base::string16& string_a,
const base::string16& string_b) {
return string_a.length() > string_b.length();
}
class UpdateRecentVisitsFromHistoryDBTask : public HistoryDBTask {
public:
explicit UpdateRecentVisitsFromHistoryDBTask(
URLIndexPrivateData* private_data,
URLID url_id);
virtual bool RunOnDBThread(HistoryBackend* backend,
history::HistoryDatabase* db) OVERRIDE;
virtual void DoneRunOnMainThread() OVERRIDE;
private:
virtual ~UpdateRecentVisitsFromHistoryDBTask();
URLIndexPrivateData* private_data_;
URLID url_id_;
bool succeeded_;
VisitVector recent_visits_;
DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);
};
UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(
URLIndexPrivateData* private_data,
URLID url_id)
: private_data_(private_data),
url_id_(url_id),
succeeded_(false) {
}
bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(
HistoryBackend* backend,
HistoryDatabase* db) {
DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
succeeded_ = db->GetMostRecentVisitsForURL(url_id_,
kMaxVisitsToStoreInCache,
&recent_visits_);
if (!succeeded_)
recent_visits_.clear();
return true;
}
void UpdateRecentVisitsFromHistoryDBTask::DoneRunOnMainThread() {
if (succeeded_)
private_data_->UpdateRecentVisits(url_id_, recent_visits_);
}
UpdateRecentVisitsFromHistoryDBTask::~UpdateRecentVisitsFromHistoryDBTask() {
}
URLIndexPrivateData::URLIndexPrivateData()
: restored_cache_version_(0),
saved_cache_version_(kCurrentCacheFileVersion),
pre_filter_item_count_(0),
post_filter_item_count_(0),
post_scoring_item_count_(0) {
}
ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
base::string16 search_string,
size_t cursor_position,
const std::string& languages,
BookmarkService* bookmark_service) {
if ((cursor_position != base::string16::npos) &&
(cursor_position < search_string.length()) &&
(cursor_position > 0)) {
search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
}
pre_filter_item_count_ = 0;
post_filter_item_count_ = 0;
post_scoring_item_count_ = 0;
base::string16 lower_raw_string(base::i18n::ToLower(search_string));
base::string16 lower_unescaped_string =
net::UnescapeURLComponent(lower_raw_string,
net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
String16Vector lower_words(
history::String16VectorFromString16(lower_unescaped_string, false, NULL));
ScoredHistoryMatches scored_items;
if (word_list_.empty() || lower_words.empty()) {
search_term_cache_.clear();
return scored_items;
}
ResetSearchTermCache();
HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
const size_t kItemsToScoreLimit = 500;
pre_filter_item_count_ = history_id_set.size();
bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
if (was_trimmed) {
HistoryIDVector history_ids;
std::copy(history_id_set.begin(), history_id_set.end(),
std::back_inserter(history_ids));
HistoryItemFactorGreater
item_factor_functor(history_info_map_);
std::partial_sort(history_ids.begin(),
history_ids.begin() + kItemsToScoreLimit,
history_ids.end(),
item_factor_functor);
history_id_set.clear();
std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
std::inserter(history_id_set, history_id_set.end()));
post_filter_item_count_ = history_id_set.size();
}
history::String16Vector lower_raw_terms;
if (Tokenize(lower_raw_string, base::kWhitespaceUTF16,
&lower_raw_terms) == 0) {
return scored_items;
}
scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(*this, languages, bookmark_service, lower_raw_string,
lower_raw_terms, base::Time::Now())).ScoredMatches();
if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
std::partial_sort(scored_items.begin(),
scored_items.begin() +
AutocompleteProvider::kMaxMatches,
scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater);
scored_items.resize(AutocompleteProvider::kMaxMatches);
} else {
std::sort(scored_items.begin(), scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater);
}
post_scoring_item_count_ = scored_items.size();
if (was_trimmed) {
search_term_cache_.clear();
} else {
for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
cache_iter != search_term_cache_.end(); ) {
if (!cache_iter->second.used_)
search_term_cache_.erase(cache_iter++);
else
++cache_iter;
}
}
return scored_items;
}
bool URLIndexPrivateData::UpdateURL(
HistoryService* history_service,
const URLRow& row,
const std::string& languages,
const std::set<std::string>& scheme_whitelist) {
bool row_was_updated = false;
URLID row_id = row.id();
HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
if (row_pos == history_info_map_.end()) {
URLRow new_row(row);
new_row.set_id(row_id);
row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
IndexRow(NULL, history_service, new_row, languages, scheme_whitelist);
} else if (RowQualifiesAsSignificant(row, base::Time())) {
URLRow& row_to_update = row_pos->second.url_row;
bool title_updated = row_to_update.title() != row.title();
if (row_to_update.visit_count() != row.visit_count() ||
row_to_update.typed_count() != row.typed_count() ||
row_to_update.last_visit() != row.last_visit() || title_updated) {
row_to_update.set_visit_count(row.visit_count());
row_to_update.set_typed_count(row.typed_count());
row_to_update.set_last_visit(row.last_visit());
ScheduleUpdateRecentVisits(history_service, row_id);
if (title_updated) {
RemoveRowWordsFromIndex(row_to_update);
row_to_update.set_title(row.title());
RowWordStarts word_starts;
AddRowWordsToIndex(row_to_update, &word_starts, languages);
word_starts_map_[row_id] = word_starts;
}
row_was_updated = true;
}
} else {
RemoveRowFromIndex(row);
row_was_updated = true;
}
if (row_was_updated)
search_term_cache_.clear();
return row_was_updated;
}
void URLIndexPrivateData::UpdateRecentVisits(
URLID url_id,
const VisitVector& recent_visits) {
HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);
if (row_pos != history_info_map_.end()) {
VisitInfoVector* visits = &row_pos->second.visits;
visits->clear();
const size_t size =
std::min(recent_visits.size(), kMaxVisitsToStoreInCache);
visits->reserve(size);
for (size_t i = 0; i < size; i++) {
visits->push_back(std::make_pair(recent_visits[i].visit_time,
recent_visits[i].transition));
}
}
}
void URLIndexPrivateData::ScheduleUpdateRecentVisits(
HistoryService* history_service,
URLID url_id) {
history_service->ScheduleDBTask(
new UpdateRecentVisitsFromHistoryDBTask(this, url_id),
&recent_visits_consumer_);
}
class HistoryInfoMapItemHasURL {
public:
explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
bool operator()(const std::pair<const HistoryID, HistoryInfoMapValue>& item) {
return item.second.url_row.url() == url_;
}
private:
const GURL& url_;
};
bool URLIndexPrivateData::DeleteURL(const GURL& url) {
HistoryInfoMap::iterator pos = std::find_if(
history_info_map_.begin(),
history_info_map_.end(),
HistoryInfoMapItemHasURL(url));
if (pos == history_info_map_.end())
return false;
RemoveRowFromIndex(pos->second.url_row);
search_term_cache_.clear();
return true;
}
scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
const base::FilePath& file_path,
const std::string& languages) {
base::TimeTicks beginning_time = base::TimeTicks::Now();
if (!base::PathExists(file_path))
return NULL;
std::string data;
if (!base::ReadFileToString(file_path, &data))
return NULL;
scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
InMemoryURLIndexCacheItem index_cache;
if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
<< file_path.value();
return restored_data;
}
if (!restored_data->RestorePrivateData(index_cache, languages))
return NULL;
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
base::TimeTicks::Now() - beginning_time);
UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
restored_data->history_id_word_map_.size());
UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
restored_data->word_map_.size());
UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
restored_data->char_word_map_.size());
if (restored_data->Empty())
return NULL;
return restored_data;
}
scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
HistoryDatabase* history_db,
const std::string& languages,
const std::set<std::string>& scheme_whitelist) {
if (!history_db)
return NULL;
base::TimeTicks beginning_time = base::TimeTicks::Now();
scoped_refptr<URLIndexPrivateData>
rebuilt_data(new URLIndexPrivateData);
URLDatabase::URLEnumerator history_enum;
if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
return NULL;
rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();
for (URLRow row; history_enum.GetNextURL(&row); ) {
rebuilt_data->IndexRow(history_db, NULL, row, languages,
scheme_whitelist);
}
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
base::TimeTicks::Now() - beginning_time);
UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
rebuilt_data->history_id_word_map_.size());
UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
rebuilt_data->word_map_.size());
UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
rebuilt_data->char_word_map_.size());
return rebuilt_data;
}
bool URLIndexPrivateData::WritePrivateDataToCacheFileTask(
scoped_refptr<URLIndexPrivateData> private_data,
const base::FilePath& file_path) {
DCHECK(private_data.get());
DCHECK(!file_path.empty());
return private_data->SaveToFile(file_path);
}
void URLIndexPrivateData::CancelPendingUpdates() {
recent_visits_consumer_.CancelAllRequests();
}
scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
data_copy->last_time_rebuilt_from_history_ = last_time_rebuilt_from_history_;
data_copy->word_list_ = word_list_;
data_copy->available_words_ = available_words_;
data_copy->word_map_ = word_map_;
data_copy->char_word_map_ = char_word_map_;
data_copy->word_id_history_map_ = word_id_history_map_;
data_copy->history_id_word_map_ = history_id_word_map_;
data_copy->history_info_map_ = history_info_map_;
data_copy->word_starts_map_ = word_starts_map_;
return data_copy;
};
bool URLIndexPrivateData::Empty() const {
return history_info_map_.empty();
}
void URLIndexPrivateData::Clear() {
last_time_rebuilt_from_history_ = base::Time();
word_list_.clear();
available_words_.clear();
word_map_.clear();
char_word_map_.clear();
word_id_history_map_.clear();
history_id_word_map_.clear();
history_info_map_.clear();
word_starts_map_.clear();
}
URLIndexPrivateData::~URLIndexPrivateData() {}
HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
const String16Vector& unsorted_words) {
HistoryIDSet history_id_set;
String16Vector words(unsorted_words);
std::sort(words.begin(), words.end(), LengthGreater);
for (String16Vector::iterator iter = words.begin(); iter != words.end();
++iter) {
base::string16 uni_word = *iter;
HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
if (term_history_set.empty()) {
history_id_set.clear();
break;
}
if (iter == words.begin()) {
history_id_set.swap(term_history_set);
} else {
HistoryIDSet new_history_id_set;
std::set_intersection(history_id_set.begin(), history_id_set.end(),
term_history_set.begin(), term_history_set.end(),
std::inserter(new_history_id_set,
new_history_id_set.begin()));
history_id_set.swap(new_history_id_set);
}
}
return history_id_set;
}
HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
const base::string16& term) {
if (term.empty())
return HistoryIDSet();
size_t term_length = term.length();
WordIDSet word_id_set;
if (term_length > 1) {
SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
cache_iter != search_term_cache_.end(); ++cache_iter) {
if (StartsWith(term, cache_iter->first, false) &&
(best_prefix == search_term_cache_.end() ||
cache_iter->first.length() > best_prefix->first.length()))
best_prefix = cache_iter;
}
Char16Set prefix_chars;
base::string16 leftovers(term);
if (best_prefix != search_term_cache_.end()) {
size_t prefix_length = best_prefix->first.length();
if (prefix_length == term_length) {
best_prefix->second.used_ = true;
return best_prefix->second.history_id_set_;
}
if (best_prefix->second.history_id_set_.empty()) {
search_term_cache_[term] = SearchTermCacheItem();
return HistoryIDSet();
}
word_id_set = best_prefix->second.word_id_set_;
prefix_chars = Char16SetFromString16(best_prefix->first);
leftovers = term.substr(prefix_length);
}
Char16Set leftover_chars = Char16SetFromString16(leftovers);
Char16Set unique_chars =
base::STLSetDifference<Char16Set>(leftover_chars, prefix_chars);
if (!unique_chars.empty()) {
WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
if (leftover_set.empty()) {
search_term_cache_[term] = SearchTermCacheItem();
return HistoryIDSet();
}
if (prefix_chars.empty()) {
word_id_set.swap(leftover_set);
} else {
WordIDSet new_word_id_set;
std::set_intersection(word_id_set.begin(), word_id_set.end(),
leftover_set.begin(), leftover_set.end(),
std::inserter(new_word_id_set,
new_word_id_set.begin()));
word_id_set.swap(new_word_id_set);
}
}
for (WordIDSet::iterator word_set_iter = word_id_set.begin();
word_set_iter != word_id_set.end(); ) {
if (word_list_[*word_set_iter].find(term) == base::string16::npos)
word_id_set.erase(word_set_iter++);
else
++word_set_iter;
}
} else {
word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
}
HistoryIDSet history_id_set;
if (!word_id_set.empty()) {
for (WordIDSet::iterator word_id_iter = word_id_set.begin();
word_id_iter != word_id_set.end(); ++word_id_iter) {
WordID word_id = *word_id_iter;
WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
if (word_iter != word_id_history_map_.end()) {
HistoryIDSet& word_history_id_set(word_iter->second);
history_id_set.insert(word_history_id_set.begin(),
word_history_id_set.end());
}
}
}
if (term_length > 1)
search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
return history_id_set;
}
WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
const Char16Set& term_chars) {
WordIDSet word_id_set;
for (Char16Set::const_iterator c_iter = term_chars.begin();
c_iter != term_chars.end(); ++c_iter) {
CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
if (char_iter == char_word_map_.end()) {
word_id_set.clear();
break;
}
WordIDSet& char_word_id_set(char_iter->second);
if (char_word_id_set.empty()) {
word_id_set.clear();
break;
}
if (c_iter == term_chars.begin()) {
word_id_set = char_word_id_set;
} else {
WordIDSet new_word_id_set;
std::set_intersection(word_id_set.begin(), word_id_set.end(),
char_word_id_set.begin(), char_word_id_set.end(),
std::inserter(new_word_id_set,
new_word_id_set.begin()));
word_id_set.swap(new_word_id_set);
}
}
return word_id_set;
}
bool URLIndexPrivateData::IndexRow(
HistoryDatabase* history_db,
HistoryService* history_service,
const URLRow& row,
const std::string& languages,
const std::set<std::string>& scheme_whitelist) {
const GURL& gurl(row.url());
if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
return false;
URLID row_id = row.id();
base::string16 url(net::FormatUrl(gurl, languages,
net::kFormatUrlOmitUsernamePassword,
net::UnescapeRule::NONE,
NULL, NULL, NULL));
HistoryID history_id = static_cast<HistoryID>(row_id);
DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
URLRow new_row(GURL(url), row_id);
new_row.set_visit_count(row.visit_count());
new_row.set_typed_count(row.typed_count());
new_row.set_last_visit(row.last_visit());
new_row.set_title(row.title());
history_info_map_[history_id].url_row = new_row;
RowWordStarts word_starts;
AddRowWordsToIndex(new_row, &word_starts, languages);
word_starts_map_[history_id] = word_starts;
if (history_db) {
VisitVector recent_visits;
DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
if (history_db->GetMostRecentVisitsForURL(row_id,
kMaxVisitsToStoreInCache,
&recent_visits))
UpdateRecentVisits(row_id, recent_visits);
} else {
DCHECK(history_service);
ScheduleUpdateRecentVisits(history_service, row_id);
}
return true;
}
void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
RowWordStarts* word_starts,
const std::string& languages) {
HistoryID history_id = static_cast<HistoryID>(row.id());
const GURL& gurl(row.url());
const base::string16& url = CleanUpUrlForMatching(gurl, languages);
String16Set url_words = String16SetFromString16(url,
word_starts ? &word_starts->url_word_starts_ : NULL);
const base::string16& title = CleanUpTitleForMatching(row.title());
String16Set title_words = String16SetFromString16(title,
word_starts ? &word_starts->title_word_starts_ : NULL);
String16Set words;
std::set_union(url_words.begin(), url_words.end(),
title_words.begin(), title_words.end(),
std::insert_iterator<String16Set>(words, words.begin()));
for (String16Set::iterator word_iter = words.begin();
word_iter != words.end(); ++word_iter)
AddWordToIndex(*word_iter, history_id);
search_term_cache_.clear();
}
void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
HistoryID history_id) {
WordMap::iterator word_pos = word_map_.find(term);
if (word_pos != word_map_.end())
UpdateWordHistory(word_pos->second, history_id);
else
AddWordHistory(term, history_id);
}
void URLIndexPrivateData::AddWordHistory(const base::string16& term,
HistoryID history_id) {
WordID word_id = word_list_.size();
if (available_words_.empty()) {
word_list_.push_back(term);
} else {
word_id = *(available_words_.begin());
word_list_[word_id] = term;
available_words_.erase(word_id);
}
word_map_[term] = word_id;
HistoryIDSet history_id_set;
history_id_set.insert(history_id);
word_id_history_map_[word_id] = history_id_set;
AddToHistoryIDWordMap(history_id, word_id);
Char16Set characters = Char16SetFromString16(term);
for (Char16Set::iterator uni_char_iter = characters.begin();
uni_char_iter != characters.end(); ++uni_char_iter) {
base::char16 uni_char = *uni_char_iter;
CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
if (char_iter != char_word_map_.end()) {
WordIDSet& word_id_set(char_iter->second);
word_id_set.insert(word_id);
} else {
WordIDSet word_id_set;
word_id_set.insert(word_id);
char_word_map_[uni_char] = word_id_set;
}
}
}
void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
HistoryID history_id) {
WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
DCHECK(history_pos != word_id_history_map_.end());
HistoryIDSet& history_id_set(history_pos->second);
history_id_set.insert(history_id);
AddToHistoryIDWordMap(history_id, word_id);
}
void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
WordID word_id) {
HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
if (iter != history_id_word_map_.end()) {
WordIDSet& word_id_set(iter->second);
word_id_set.insert(word_id);
} else {
WordIDSet word_id_set;
word_id_set.insert(word_id);
history_id_word_map_[history_id] = word_id_set;
}
}
void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {
RemoveRowWordsFromIndex(row);
HistoryID history_id = static_cast<HistoryID>(row.id());
history_info_map_.erase(history_id);
word_starts_map_.erase(history_id);
}
void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {
HistoryID history_id = static_cast<HistoryID>(row.id());
WordIDSet word_id_set = history_id_word_map_[history_id];
history_id_word_map_.erase(history_id);
for (WordIDSet::iterator word_id_iter = word_id_set.begin();
word_id_iter != word_id_set.end(); ++word_id_iter) {
WordID word_id = *word_id_iter;
word_id_history_map_[word_id].erase(history_id);
if (!word_id_history_map_[word_id].empty())
continue;
base::string16 word = word_list_[word_id];
Char16Set characters = Char16SetFromString16(word);
for (Char16Set::iterator uni_char_iter = characters.begin();
uni_char_iter != characters.end(); ++uni_char_iter) {
base::char16 uni_char = *uni_char_iter;
char_word_map_[uni_char].erase(word_id);
if (char_word_map_[uni_char].empty())
char_word_map_.erase(uni_char);
}
word_id_history_map_.erase(word_id);
word_map_.erase(word);
word_list_[word_id] = base::string16();
available_words_.insert(word_id);
}
}
void URLIndexPrivateData::ResetSearchTermCache() {
for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
iter != search_term_cache_.end(); ++iter)
iter->second.used_ = false;
}
bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
base::TimeTicks beginning_time = base::TimeTicks::Now();
InMemoryURLIndexCacheItem index_cache;
SavePrivateData(&index_cache);
std::string data;
if (!index_cache.SerializeToString(&data)) {
LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
return false;
}
int size = data.size();
if (base::WriteFile(file_path, data.c_str(), size) != size) {
LOG(WARNING) << "Failed to write " << file_path.value();
return false;
}
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
base::TimeTicks::Now() - beginning_time);
return true;
}
void URLIndexPrivateData::SavePrivateData(
InMemoryURLIndexCacheItem* cache) const {
DCHECK(cache);
cache->set_last_rebuild_timestamp(
last_time_rebuilt_from_history_.ToInternalValue());
cache->set_version(saved_cache_version_);
cache->set_history_item_count(0);
SaveWordList(cache);
SaveWordMap(cache);
SaveCharWordMap(cache);
SaveWordIDHistoryMap(cache);
SaveHistoryInfoMap(cache);
SaveWordStartsMap(cache);
}
void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
if (word_list_.empty())
return;
WordListItem* list_item = cache->mutable_word_list();
list_item->set_word_count(word_list_.size());
for (String16Vector::const_iterator iter = word_list_.begin();
iter != word_list_.end(); ++iter)
list_item->add_word(base::UTF16ToUTF8(*iter));
}
void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
if (word_map_.empty())
return;
WordMapItem* map_item = cache->mutable_word_map();
map_item->set_item_count(word_map_.size());
for (WordMap::const_iterator iter = word_map_.begin();
iter != word_map_.end(); ++iter) {
WordMapEntry* map_entry = map_item->add_word_map_entry();
map_entry->set_word(base::UTF16ToUTF8(iter->first));
map_entry->set_word_id(iter->second);
}
}
void URLIndexPrivateData::SaveCharWordMap(
InMemoryURLIndexCacheItem* cache) const {
if (char_word_map_.empty())
return;
CharWordMapItem* map_item = cache->mutable_char_word_map();
map_item->set_item_count(char_word_map_.size());
for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
iter != char_word_map_.end(); ++iter) {
CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
map_entry->set_char_16(iter->first);
const WordIDSet& word_id_set(iter->second);
map_entry->set_item_count(word_id_set.size());
for (WordIDSet::const_iterator set_iter = word_id_set.begin();
set_iter != word_id_set.end(); ++set_iter)
map_entry->add_word_id(*set_iter);
}
}
void URLIndexPrivateData::SaveWordIDHistoryMap(
InMemoryURLIndexCacheItem* cache) const {
if (word_id_history_map_.empty())
return;
WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
map_item->set_item_count(word_id_history_map_.size());
for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
iter != word_id_history_map_.end(); ++iter) {
WordIDHistoryMapEntry* map_entry =
map_item->add_word_id_history_map_entry();
map_entry->set_word_id(iter->first);
const HistoryIDSet& history_id_set(iter->second);
map_entry->set_item_count(history_id_set.size());
for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
set_iter != history_id_set.end(); ++set_iter)
map_entry->add_history_id(*set_iter);
}
}
void URLIndexPrivateData::SaveHistoryInfoMap(
InMemoryURLIndexCacheItem* cache) const {
if (history_info_map_.empty())
return;
HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
map_item->set_item_count(history_info_map_.size());
for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
iter != history_info_map_.end(); ++iter) {
HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
map_entry->set_history_id(iter->first);
const URLRow& url_row(iter->second.url_row);
map_entry->set_visit_count(url_row.visit_count());
map_entry->set_typed_count(url_row.typed_count());
map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
map_entry->set_url(url_row.url().spec());
map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
const VisitInfoVector& visits(iter->second.visits);
for (VisitInfoVector::const_iterator visit_iter = visits.begin();
visit_iter != visits.end(); ++visit_iter) {
HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
visit_info->set_visit_time(visit_iter->first.ToInternalValue());
visit_info->set_transition_type(visit_iter->second);
}
}
}
void URLIndexPrivateData::SaveWordStartsMap(
InMemoryURLIndexCacheItem* cache) const {
if (word_starts_map_.empty())
return;
if (saved_cache_version_ < 1)
return;
WordStartsMapItem* map_item = cache->mutable_word_starts_map();
map_item->set_item_count(word_starts_map_.size());
for (WordStartsMap::const_iterator iter = word_starts_map_.begin();
iter != word_starts_map_.end(); ++iter) {
WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
map_entry->set_history_id(iter->first);
const RowWordStarts& word_starts(iter->second);
for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
i != word_starts.url_word_starts_.end(); ++i)
map_entry->add_url_word_starts(*i);
for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
i != word_starts.title_word_starts_.end(); ++i)
map_entry->add_title_word_starts(*i);
}
}
bool URLIndexPrivateData::RestorePrivateData(
const InMemoryURLIndexCacheItem& cache,
const std::string& languages) {
last_time_rebuilt_from_history_ =
base::Time::FromInternalValue(cache.last_rebuild_timestamp());
const base::TimeDelta rebuilt_ago =
base::Time::Now() - last_time_rebuilt_from_history_;
if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
(rebuilt_ago < base::TimeDelta::FromDays(-1))) {
return false;
}
if (cache.has_version()) {
if (cache.version() < kCurrentCacheFileVersion) {
return false;
}
restored_cache_version_ = cache.version();
}
return RestoreWordList(cache) && RestoreWordMap(cache) &&
RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
}
bool URLIndexPrivateData::RestoreWordList(
const InMemoryURLIndexCacheItem& cache) {
if (!cache.has_word_list())
return false;
const WordListItem& list_item(cache.word_list());
uint32 expected_item_count = list_item.word_count();
uint32 actual_item_count = list_item.word_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<std::string>& words(list_item.word());
for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
iter != words.end(); ++iter)
word_list_.push_back(base::UTF8ToUTF16(*iter));
return true;
}
bool URLIndexPrivateData::RestoreWordMap(
const InMemoryURLIndexCacheItem& cache) {
if (!cache.has_word_map())
return false;
const WordMapItem& list_item(cache.word_map());
uint32 expected_item_count = list_item.item_count();
uint32 actual_item_count = list_item.word_map_entry_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
iter != entries.end(); ++iter)
word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
return true;
}
bool URLIndexPrivateData::RestoreCharWordMap(
const InMemoryURLIndexCacheItem& cache) {
if (!cache.has_char_word_map())
return false;
const CharWordMapItem& list_item(cache.char_word_map());
uint32 expected_item_count = list_item.item_count();
uint32 actual_item_count = list_item.char_word_map_entry_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<CharWordMapEntry>&
entries(list_item.char_word_map_entry());
for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
entries.begin(); iter != entries.end(); ++iter) {
expected_item_count = iter->item_count();
actual_item_count = iter->word_id_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
base::char16 uni_char = static_cast<base::char16>(iter->char_16());
WordIDSet word_id_set;
const RepeatedField<int32>& word_ids(iter->word_id());
for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
jiter != word_ids.end(); ++jiter)
word_id_set.insert(*jiter);
char_word_map_[uni_char] = word_id_set;
}
return true;
}
bool URLIndexPrivateData::RestoreWordIDHistoryMap(
const InMemoryURLIndexCacheItem& cache) {
if (!cache.has_word_id_history_map())
return false;
const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
uint32 expected_item_count = list_item.item_count();
uint32 actual_item_count = list_item.word_id_history_map_entry_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<WordIDHistoryMapEntry>&
entries(list_item.word_id_history_map_entry());
for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
entries.begin(); iter != entries.end(); ++iter) {
expected_item_count = iter->item_count();
actual_item_count = iter->history_id_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
WordID word_id = iter->word_id();
HistoryIDSet history_id_set;
const RepeatedField<int64>& history_ids(iter->history_id());
for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
jiter != history_ids.end(); ++jiter) {
history_id_set.insert(*jiter);
AddToHistoryIDWordMap(*jiter, word_id);
}
word_id_history_map_[word_id] = history_id_set;
}
return true;
}
bool URLIndexPrivateData::RestoreHistoryInfoMap(
const InMemoryURLIndexCacheItem& cache) {
if (!cache.has_history_info_map())
return false;
const HistoryInfoMapItem& list_item(cache.history_info_map());
uint32 expected_item_count = list_item.item_count();
uint32 actual_item_count = list_item.history_info_map_entry_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<HistoryInfoMapEntry>&
entries(list_item.history_info_map_entry());
for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
entries.begin(); iter != entries.end(); ++iter) {
HistoryID history_id = iter->history_id();
GURL url(iter->url());
URLRow url_row(url, history_id);
url_row.set_visit_count(iter->visit_count());
url_row.set_typed_count(iter->typed_count());
url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
if (iter->has_title()) {
base::string16 title(base::UTF8ToUTF16(iter->title()));
url_row.set_title(title);
}
history_info_map_[history_id].url_row = url_row;
VisitInfoVector visits;
visits.reserve(iter->visits_size());
for (int i = 0; i < iter->visits_size(); ++i) {
visits.push_back(std::make_pair(
base::Time::FromInternalValue(iter->visits(i).visit_time()),
static_cast<content::PageTransition>(iter->visits(i).
transition_type())));
}
history_info_map_[history_id].visits = visits;
}
return true;
}
bool URLIndexPrivateData::RestoreWordStartsMap(
const InMemoryURLIndexCacheItem& cache,
const std::string& languages) {
if (cache.has_word_starts_map()) {
const WordStartsMapItem& list_item(cache.word_starts_map());
uint32 expected_item_count = list_item.item_count();
uint32 actual_item_count = list_item.word_starts_map_entry_size();
if (actual_item_count == 0 || actual_item_count != expected_item_count)
return false;
const RepeatedPtrField<WordStartsMapEntry>&
entries(list_item.word_starts_map_entry());
for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
entries.begin(); iter != entries.end(); ++iter) {
HistoryID history_id = iter->history_id();
RowWordStarts word_starts;
const RepeatedField<int32>& url_starts(iter->url_word_starts());
for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();
jiter != url_starts.end(); ++jiter)
word_starts.url_word_starts_.push_back(*jiter);
const RepeatedField<int32>& title_starts(iter->title_word_starts());
for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();
jiter != title_starts.end(); ++jiter)
word_starts.title_word_starts_.push_back(*jiter);
word_starts_map_[history_id] = word_starts;
}
} else {
for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
iter != history_info_map_.end(); ++iter) {
RowWordStarts word_starts;
const URLRow& row(iter->second.url_row);
const base::string16& url = CleanUpUrlForMatching(row.url(), languages);
String16VectorFromString16(url, false, &word_starts.url_word_starts_);
const base::string16& title = CleanUpTitleForMatching(row.title());
String16VectorFromString16(title, false, &word_starts.title_word_starts_);
word_starts_map_[iter->first] = word_starts;
}
}
return true;
}
bool URLIndexPrivateData::URLSchemeIsWhitelisted(
const GURL& gurl,
const std::set<std::string>& whitelist) {
return whitelist.find(gurl.scheme()) != whitelist.end();
}
URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
const WordIDSet& word_id_set,
const HistoryIDSet& history_id_set)
: word_id_set_(word_id_set),
history_id_set_(history_id_set),
used_(true) {}
URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()
: used_(true) {}
URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}
URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
const URLIndexPrivateData& private_data,
const std::string& languages,
BookmarkService* bookmark_service,
const base::string16& lower_string,
const String16Vector& lower_terms,
const base::Time now)
: private_data_(private_data),
languages_(languages),
bookmark_service_(bookmark_service),
lower_string_(lower_string),
lower_terms_(lower_terms),
now_(now) {
lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);
for (size_t i = 0; i < lower_terms_.size(); ++i) {
base::i18n::BreakIterator iter(lower_terms_[i],
base::i18n::BreakIterator::BREAK_WORD);
if (!iter.Init())
continue;
while (iter.Advance() && !iter.IsWord()) {}
if (iter.IsWord())
lower_terms_to_word_starts_offsets_[i] = iter.prev();
}
}
URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
void URLIndexPrivateData::AddHistoryMatch::operator()(
const HistoryID history_id) {
HistoryInfoMap::const_iterator hist_pos =
private_data_.history_info_map_.find(history_id);
if (hist_pos != private_data_.history_info_map_.end()) {
const URLRow& hist_item = hist_pos->second.url_row;
const VisitInfoVector& visits = hist_pos->second.visits;
WordStartsMap::const_iterator starts_pos =
private_data_.word_starts_map_.find(history_id);
DCHECK(starts_pos != private_data_.word_starts_map_.end());
ScoredHistoryMatch match(hist_item, visits, languages_, lower_string_,
lower_terms_, lower_terms_to_word_starts_offsets_,
starts_pos->second, now_, bookmark_service_);
if (match.raw_score() > 0)
scored_matches_.push_back(match);
}
}
URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
const HistoryInfoMap& history_info_map)
: history_info_map_(history_info_map) {
}
URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
const HistoryID h1,
const HistoryID h2) {
HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
if (entry1 == history_info_map_.end())
return false;
HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
if (entry2 == history_info_map_.end())
return true;
const URLRow& r1(entry1->second.url_row);
const URLRow& r2(entry2->second.url_row);
if (r1.typed_count() != r2.typed_count())
return (r1.typed_count() > r2.typed_count());
if (r1.visit_count() != r2.visit_count())
return (r1.visit_count() > r2.visit_count());
return (r1.last_visit() > r2.last_visit());
}
}