This source file includes following definitions.
- CompareMatchPosition
- SnippetIntersects
- CoalesceMatchesFrom
- CoalseAndSortMatchPositions
- IsQueryQuote
- word
- set_literal
- literal_
- AppendToSQLiteQuery
- IsWord
- Matches
- HasMatchIn
- HasMatchIn
- AppendWords
- children
- AddChild
- RemoveEmptySubnodes
- AppendToSQLiteQuery
- IsWord
- Matches
- HasMatchIn
- HasMatchIn
- AppendWords
- AppendChildrenToString
- AppendToSQLiteQuery
- MatchesAll
- HasMatchIn
- HasMatchIn
- IsWordLongEnoughForPrefixSearch
- ParseQuery
- ParseQueryWords
- ParseQueryNodes
- DoesQueryMatch
- DoesQueryMatch
- ParseQueryImpl
- ExtractQueryWords
#include "chrome/browser/history/query_parser.h"
#include <algorithm>
#include "base/compiler_specific.h"
#include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h"
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h"
namespace {
int CompareMatchPosition(const Snippet::MatchPosition& mp1,
const Snippet::MatchPosition& mp2) {
return mp1.first < mp2.first;
}
bool SnippetIntersects(const Snippet::MatchPosition& mp1,
const Snippet::MatchPosition& mp2) {
return mp2.first >= mp1.first && mp2.first <= mp1.second;
}
void CoalesceMatchesFrom(size_t index, Snippet::MatchPositions* matches) {
Snippet::MatchPosition& mp = (*matches)[index];
for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1;
i != matches->end(); ) {
if (SnippetIntersects(mp, *i)) {
mp.second = std::max(mp.second, i->second);
i = matches->erase(i);
} else {
return;
}
}
}
void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) {
std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
for (size_t i = 0; i < matches->size(); ++i)
CoalesceMatchesFrom(i, matches);
}
bool IsQueryQuote(wchar_t ch) {
return ch == '"' ||
ch == 0xab ||
ch == 0xbb ||
ch == 0x201c ||
ch == 0x201d ||
ch == 0x201e;
}
}
class QueryNodeWord : public QueryNode {
public:
explicit QueryNodeWord(const base::string16& word);
virtual ~QueryNodeWord();
const base::string16& word() const { return word_; }
void set_literal(bool literal) { literal_ = literal; }
virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
virtual bool IsWord() const OVERRIDE;
virtual bool Matches(const base::string16& word, bool exact) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
virtual void AppendWords(std::vector<base::string16>* words) const OVERRIDE;
private:
base::string16 word_;
bool literal_;
DISALLOW_COPY_AND_ASSIGN(QueryNodeWord);
};
QueryNodeWord::QueryNodeWord(const base::string16& word)
: word_(word),
literal_(false) {}
QueryNodeWord::~QueryNodeWord() {}
int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const {
query->append(word_);
if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
*query += L'*';
return 1;
}
bool QueryNodeWord::IsWord() const {
return true;
}
bool QueryNodeWord::Matches(const base::string16& word, bool exact) const {
if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
return word == word_;
return word.size() >= word_.size() &&
(word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
}
bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const {
bool matched = false;
for (size_t i = 0; i < words.size(); ++i) {
if (Matches(words[i].word, false)) {
size_t match_start = words[i].position;
match_positions->push_back(
Snippet::MatchPosition(match_start,
match_start + static_cast<int>(word_.size())));
matched = true;
}
}
return matched;
}
bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words) const {
for (size_t i = 0; i < words.size(); ++i) {
if (Matches(words[i].word, false))
return true;
}
return false;
}
void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const {
words->push_back(word_);
}
class QueryNodeList : public QueryNode {
public:
typedef std::vector<QueryNode*> QueryNodeVector;
QueryNodeList();
virtual ~QueryNodeList();
QueryNodeVector* children() { return &children_; }
void AddChild(QueryNode* node);
void RemoveEmptySubnodes();
virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
virtual bool IsWord() const OVERRIDE;
virtual bool Matches(const base::string16& word, bool exact) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
virtual void AppendWords(std::vector<base::string16>* words) const OVERRIDE;
protected:
int AppendChildrenToString(base::string16* query) const;
QueryNodeVector children_;
private:
DISALLOW_COPY_AND_ASSIGN(QueryNodeList);
};
QueryNodeList::QueryNodeList() {}
QueryNodeList::~QueryNodeList() {
STLDeleteElements(&children_);
}
void QueryNodeList::AddChild(QueryNode* node) {
children_.push_back(node);
}
void QueryNodeList::RemoveEmptySubnodes() {
for (size_t i = 0; i < children_.size(); ++i) {
if (children_[i]->IsWord())
continue;
QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]);
list_node->RemoveEmptySubnodes();
if (list_node->children()->empty()) {
children_.erase(children_.begin() + i);
--i;
delete list_node;
}
}
}
int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const {
return AppendChildrenToString(query);
}
bool QueryNodeList::IsWord() const {
return false;
}
bool QueryNodeList::Matches(const base::string16& word, bool exact) const {
NOTREACHED();
return false;
}
bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const {
NOTREACHED();
return false;
}
bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words) const {
NOTREACHED();
return false;
}
void QueryNodeList::AppendWords(std::vector<base::string16>* words) const {
for (size_t i = 0; i < children_.size(); ++i)
children_[i]->AppendWords(words);
}
int QueryNodeList::AppendChildrenToString(base::string16* query) const {
int num_words = 0;
for (QueryNodeVector::const_iterator node = children_.begin();
node != children_.end(); ++node) {
if (node != children_.begin())
query->push_back(L' ');
num_words += (*node)->AppendToSQLiteQuery(query);
}
return num_words;
}
class QueryNodePhrase : public QueryNodeList {
public:
QueryNodePhrase();
virtual ~QueryNodePhrase();
virtual int AppendToSQLiteQuery(base::string16* query) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
private:
bool MatchesAll(const std::vector<QueryWord>& words,
const QueryWord** first_word,
const QueryWord** last_word) const;
DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase);
};
QueryNodePhrase::QueryNodePhrase() {}
QueryNodePhrase::~QueryNodePhrase() {}
int QueryNodePhrase::AppendToSQLiteQuery(base::string16* query) const {
query->push_back(L'"');
int num_words = AppendChildrenToString(query);
query->push_back(L'"');
return num_words;
}
bool QueryNodePhrase::MatchesAll(const std::vector<QueryWord>& words,
const QueryWord** first_word,
const QueryWord** last_word) const {
if (words.size() < children_.size())
return false;
for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
bool matched_all = true;
for (size_t j = 0; j < children_.size(); ++j) {
if (!children_[j]->Matches(words[i + j].word, true)) {
matched_all = false;
break;
}
}
if (matched_all) {
*first_word = &words[i];
*last_word = &words[i + children_.size() - 1];
return true;
}
}
return false;
}
bool QueryNodePhrase::HasMatchIn(
const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const {
const QueryWord* first_word;
const QueryWord* last_word;
if (MatchesAll(words, &first_word, &last_word)) {
match_positions->push_back(
Snippet::MatchPosition(first_word->position,
last_word->position + last_word->word.length()));
return true;
}
return false;
}
bool QueryNodePhrase::HasMatchIn(const std::vector<QueryWord>& words) const {
const QueryWord* first_word;
const QueryWord* last_word;
return MatchesAll(words, &first_word, &last_word);
}
QueryParser::QueryParser() {}
bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) {
DCHECK(!word.empty());
size_t minimum_length = 3;
if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
minimum_length = 2;
return word.size() >= minimum_length;
}
int QueryParser::ParseQuery(const base::string16& query,
base::string16* sqlite_query) {
QueryNodeList root;
if (!ParseQueryImpl(query, &root))
return 0;
return root.AppendToSQLiteQuery(sqlite_query);
}
void QueryParser::ParseQueryWords(const base::string16& query,
std::vector<base::string16>* words) {
QueryNodeList root;
if (!ParseQueryImpl(query, &root))
return;
root.AppendWords(words);
}
void QueryParser::ParseQueryNodes(const base::string16& query,
std::vector<QueryNode*>* nodes) {
QueryNodeList root;
if (ParseQueryImpl(base::i18n::ToLower(query), &root))
nodes->swap(*root.children());
}
bool QueryParser::DoesQueryMatch(const base::string16& text,
const std::vector<QueryNode*>& query_nodes,
Snippet::MatchPositions* match_positions) {
if (query_nodes.empty())
return false;
std::vector<QueryWord> query_words;
base::string16 lower_text = base::i18n::ToLower(text);
ExtractQueryWords(lower_text, &query_words);
if (query_words.empty())
return false;
Snippet::MatchPositions matches;
for (size_t i = 0; i < query_nodes.size(); ++i) {
if (!query_nodes[i]->HasMatchIn(query_words, &matches))
return false;
}
if (lower_text.length() != text.length()) {
match_positions->clear();
} else {
CoalseAndSortMatchPositions(&matches);
match_positions->swap(matches);
}
return true;
}
bool QueryParser::DoesQueryMatch(const std::vector<QueryWord>& query_words,
const std::vector<QueryNode*>& query_nodes) {
if (query_nodes.empty() || query_words.empty())
return false;
for (size_t i = 0; i < query_nodes.size(); ++i) {
if (!query_nodes[i]->HasMatchIn(query_words))
return false;
}
return true;
}
bool QueryParser::ParseQueryImpl(const base::string16& query,
QueryNodeList* root) {
base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
if (!iter.Init())
return false;
std::vector<QueryNodeList*> query_stack;
query_stack.push_back(root);
bool in_quotes = false;
while (iter.Advance()) {
if (iter.IsWord()) {
QueryNodeWord* word_node = new QueryNodeWord(iter.GetString());
if (in_quotes)
word_node->set_literal(true);
query_stack.back()->AddChild(word_node);
} else {
if (IsQueryQuote(query[iter.prev()])) {
if (!in_quotes) {
QueryNodeList* quotes_node = new QueryNodePhrase;
query_stack.back()->AddChild(quotes_node);
query_stack.push_back(quotes_node);
in_quotes = true;
} else {
query_stack.pop_back();
in_quotes = false;
}
}
}
}
root->RemoveEmptySubnodes();
return true;
}
void QueryParser::ExtractQueryWords(const base::string16& text,
std::vector<QueryWord>* words) {
base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
if (!iter.Init())
return;
while (iter.Advance()) {
if (iter.IsWord()) {
base::string16 word = iter.GetString();
if (!word.empty()) {
words->push_back(QueryWord());
words->back().word = word;
words->back().position = iter.prev();
}
}
}
}