This source file includes following definitions.
- ComparePair1st
- BuildSnippet
- TEST
- TEST
- TEST
- TEST
- TEST
#include "chrome/browser/history/snippet.h"
#include <algorithm>
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
const char* kSampleDocument = "Google\xe2\x84\xa2 Terms of Service "
"Welcome to Google! "
"1. Your relationship with Google "
"1.1 Your use of Google's products, software, services and web sites "
"(referred to collectively as the \"Services\" in this document and excluding "
"any services provided to you by Google under a separate written agreement) "
"is subject to the terms of a legal agreement between you and Google. "
"\"Google\" means Google Inc., whose principal place of business is at 1600 "
"Amphitheatre Parkway, Mountain View, CA 94043, United States. This document "
"explains how the agreement is made up, and sets out some of the terms of "
"that agreement.";
};
const char* kThaiSample = "Google \xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87"
"\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7"
"\xE0\xB8\xA1 \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A"
"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5 \xE0\xB9\x80\xE0\xB8\xA1"
"\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93"
"\xE0\xB8\xA5\xE0\xB8\x87\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0\xB8\x9A"
"\xE0\xB8\xB5\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7"
"\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB8\x9A"
"\xE0\xB8\xA3\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x82"
"\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7"
"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x82\xE0\xB9\x89"
"\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\x94\xE0\xB8\xB1"
"\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0\xB9\x88\xE0\xB8\xB2\xE0\xB8\xA7"
"\xE0\xB9\x82\xE0\xB8\x94\xE0\xB8\xA2\xE0\xB8\xAA\xE0\xB8\xA1\xE0\xB8\xB1"
"\xE0\xB8\x84\xE0\xB8\xA3\xE0\xB9\x83\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3"
"\xE0\xB8\xB2\xE0\xB8\xAD\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3\xE0\xB8\xA7"
"\xE0\xB8\xA1\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9"
"\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A"
"\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5"
"\xE0\xB9\x88\xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3"
"\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x88"
"\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0\xB9\x80"
"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A"
"\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5"
"\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4"
"\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD\xE0\xB8\xB7\xE0\xB9\x88"
"\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD\xE0\xB8\x87 Google \xE0\xB8\xAB"
"\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8\xAD\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84"
"\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA"
"\xE0\xB8\xB2\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88"
"\xE0\xB8\xAD\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89\xE0\xB8\x9C\xE0\xB8\xB9"
"\xE0\xB9\x89\xE0\xB9\x83\xE0\xB8\x8A\xE0\xB9\x89\xE0\xB9\x84\xE0\xB8\x94"
"\xE0\xB9\x89\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3"
"\xE0\xB8\xB0\xE0\xB8\xAA\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3"
"\xE0\xB8\x93\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\x94"
"\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0\xB9\x89\xE0\xB8\x99 \xE0\xB8\xA3"
"\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x97\xE0\xB8\xB1\xE0\xB9\x89\xE0\xB8\x87"
"\xE0\xB8\x9B\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95"
"\xE0\xB9\x88\xE0\xB8\x87\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7\xE0\xB9\x89"
"\xE0\xB8\xAD\xE0\xB8\xAB\xE0\xB8\xB2\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89"
"\xE0\xB9\x80\xE0\xB8\xAB\xE0\xB8\xA1\xE0\xB8\xB2\xE0\xB8\xB0\xE0\xB8\xAA"
"\xE0\xB8\xB3\xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x84"
"\xE0\xB8\xB8\xE0\xB8\x93";
bool ComparePair1st(const Snippet::MatchPosition& a,
const Snippet::MatchPosition& b) {
return a.first < b.first;
}
base::string16 BuildSnippet(const std::string& document,
const std::string& query) {
const std::string document_folded = StringToLowerASCII(std::string(document));
std::vector<std::string> query_words;
base::SplitString(query, ' ', &query_words);
Snippet::MatchPositions match_positions;
match_positions.clear();
for (std::vector<std::string>::iterator qw = query_words.begin();
qw != query_words.end(); ++qw) {
size_t ofs = 0;
while ((ofs = document_folded.find(*qw, ofs)) != std::string::npos) {
match_positions.push_back(std::make_pair(ofs, ofs + qw->size()));
ofs += qw->size();
}
}
std::sort(match_positions.begin(), match_positions.end(), ComparePair1st);
Snippet snippet;
snippet.ComputeSnippet(match_positions, document);
base::string16 star_snippet;
Snippet::MatchPositions::const_iterator match;
size_t pos = 0;
for (match = snippet.matches().begin();
match != snippet.matches().end(); ++match) {
star_snippet += snippet.text().substr(pos, match->first - pos);
star_snippet += base::UTF8ToUTF16("**");
star_snippet += snippet.text().substr(match->first,
match->second - match->first);
star_snippet += base::UTF8ToUTF16("**");
pos = match->second;
}
star_snippet += snippet.text().substr(pos);
return star_snippet;
}
TEST(Snippets, SimpleQuery) {
ASSERT_EQ(" ... eferred to collectively as the \"Services\" in this "
"**document** and excluding any services provided to you by "
"Goo ... ... way, Mountain View, CA 94043, United States. This "
"**document** explains how the agreement is made up, and sets "
"o ... ",
base::UTF16ToUTF8(BuildSnippet(kSampleDocument, "document")));
}
TEST(Snippets, NearbyWords) {
ASSERT_EQ(" ... lace of business is at 1600 Amphitheatre Parkway, "
"**Mountain** **View**, CA 94043, United States. This "
"document explains ... ",
base::UTF16ToUTF8(BuildSnippet(kSampleDocument, "mountain view")));
}
TEST(Snippets, UTF8) {
ASSERT_EQ(" ... ogle\xe2\x84\xa2 Terms of Service Welcome to Google! "
"1. Your **relationship** with Google 1.1 Your use of Google's "
"products, so ... ",
base::UTF16ToUTF8(BuildSnippet(kSampleDocument, "relationship")));
}
TEST(Snippets, ThaiUTF8) {
ASSERT_EQ(" ... "
"\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7"
"\xE0\xB8\xA1 \xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8"
"\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88\xE0\xB8"
"\xA7\xE0\xB8\x99\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8"
"\x84\xE0\xB8\xA5 \xE0\xB9\x80\xE0\xB8\xA1\xE0\xB8\xB7\xE0"
"\xB9\x88\xE0\xB8\xAD\xE0\xB8\x84\xE0\xB8\xB8\xE0\xB8\x93\xE0"
"\xB8\xA5\xE0\xB8\x87\xE0\xB8\x97\xE0\xB8\xB0\xE0\xB9\x80\xE0"
"\xB8\x9A\xE0\xB8\xB5\xE0\xB8\xA2\xE0\xB8\x99\xE0\xB9\x80\xE0"
"\xB8\x9E\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\xAD\xE0\xB9\x83\xE0"
"\xB8\x8A\xE0\xB9\x89\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xB4\xE0"
"\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\x82\xE0\xB8\xAD\xE0"
"\xB8\x87 Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0\xB8"
"\xAD**\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89**\xE0\xB8\x82\xE0"
"\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0"
"\xB8\x94\xE0\xB8\xB1\xE0\xB8\x87\xE0\xB8\x81\xE0\xB8\xA5\xE0"
"\xB9\x88\xE0\xB8\xB2\xE0\xB8\xA7\xE0\xB9\x82\xE0\xB8\x94\xE0"
"\xB8\xA2\xE0\xB8\xAA\xE0\xB8\xA1\xE0\xB8\xB1\xE0\xB8\x84\xE0"
"\xB8\xA3\xE0\xB9\x83\xE0\xB8\x88 \xE0\xB9\x80\xE0\xB8\xA3"
"\xE0\xB8\xB2\xE0\xB8\xAD\xE0\xB8\xB2\xE0\xB8\x88\xE0\xB8\xA3"
"\xE0\xB8\xA7\xE0\xB8\xA1\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xAD"
"\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5\xE0\xB8\xAA\xE0\xB9\x88"
"\xE0\xB8\xA7\xE0\xB8\x99\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84"
"\xE0\xB8\x84\xE0\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88"
"\xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87\xE0\xB8\x9A\xE0\xB8\xA3"
"\xE0\xB8\xA7\xE0\xB8\x9A\xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1"
"\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x84\xE0\xB8\xB8"
"\xE0\xB8\x93\xE0\xB9\x80\xE0\xB8\x82\xE0\xB9\x89\xE0\xB8\xB2"
"\xE0\xB8\x81\xE0\xB8\xB1\xE0\xB8\x9A ... ... \xE0\xB8\x82"
"\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xA1\xE0\xB8\xB9\xE0\xB8\xA5"
"\xE0\xB8\x88\xE0\xB8\xB2\xE0\xB8\x81\xE0\xB8\x9A\xE0\xB8\xA3"
"\xE0\xB8\xB4\xE0\xB8\x81\xE0\xB8\xB2\xE0\xB8\xA3\xE0\xB8\xAD"
"\xE0\xB8\xB7\xE0\xB9\x88\xE0\xB8\x99\xE0\xB8\x82\xE0\xB8\xAD"
"\xE0\xB8\x87 Google \xE0\xB8\xAB\xE0\xB8\xA3\xE0\xB8\xB7\xE0"
"\xB8\xAD\xE0\xB8\x9A\xE0\xB8\xB8\xE0\xB8\x84\xE0\xB8\x84\xE0"
"\xB8\xA5\xE0\xB8\x97\xE0\xB8\xB5\xE0\xB9\x88\xE0\xB8\xAA\xE0"
"\xB8\xB2\xE0\xB8\xA1 \xE0\xB9\x80\xE0\xB8\x9E\xE0\xB8\xB7"
"\xE0\xB9\x88\xE0\xB8\xAD**\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9"
"\x89**\xE0\xB8\x9C\xE0\xB8\xB9\xE0\xB9\x89\xE0\xB9\x83\xE0"
"\xB8\x8A\xE0\xB9\x89\xE0\xB9\x84\xE0\xB8\x94\xE0\xB9\x89\xE0"
"\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB8\x9B\xE0\xB8\xA3\xE0"
"\xB8\xB0\xE0\xB8\xAA\xE0\xB8\x9A\xE0\xB8\x81\xE0\xB8\xB2\xE0"
"\xB8\xA3\xE0\xB8\x93\xE0\xB9\x8C\xE0\xB8\x97\xE0\xB8\xB5\xE0"
"\xB9\x88\xE0\xB8\x94\xE0\xB8\xB5\xE0\xB8\x82\xE0\xB8\xB6\xE0"
"\xB9\x89\xE0\xB8\x99 \xE0\xB8\xA3\xE0\xB8\xA7\xE0\xB8\xA1"
"\xE0\xB8\x97\xE0\xB8\xB1\xE0\xB9\x89\xE0\xB8\x87\xE0\xB8\x9B"
"\xE0\xB8\xA3\xE0\xB8\xB1\xE0\xB8\x9A\xE0\xB9\x81\xE0\xB8\x95"
"\xE0\xB9\x88\xE0\xB8\x87\xE0\xB9\x80\xE0\xB8\x99\xE0\xB8\xB7"
"\xE0\xB9\x89\xE0\xB8\xAD\xE0\xB8\xAB\xE0\xB8\xB2",
base::UTF16ToUTF8(BuildSnippet(kThaiSample,
"\xE0\xB9\x83\xE0\xB8\xAB\xE0\xB9\x89")));
}
TEST(Snippets, ExtractMatchPositions) {
struct TestData {
const std::string offsets_string;
const size_t expected_match_count;
const size_t expected_matches[10];
} data[] = {
{ "0 0 1 2 0 0 4 1 0 0 1 5", 1, { 1, 6 } },
{ "0 0 1 4 0 0 2 1", 1, { 1, 5 } },
{ "0 0 4 1 0 0 2 1", 2, { 2, 3, 4, 5 } },
{ "0 0 0 1", 1, { 0, 1 } },
{ "0 0 0 1 0 0 0 2", 1, { 0, 2 } },
{ "0 0 1 1 0 0 1 2", 1, { 1, 3 } },
{ "0 0 1 2 0 0 4 3 0 0 3 1", 1, { 1, 7 } },
{ "0 0 1 4 0 0 2 5", 1, { 1, 7 } },
{ "0 0 1 2 0 0 1 1", 1, { 1, 3 } },
{ "0 0 1 1 0 0 5 2 0 0 10 1 0 0 3 10", 2, { 1, 2, 3, 13 } },
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
Snippet::MatchPositions matches;
Snippet::ExtractMatchPositions(data[i].offsets_string, "0", &matches);
EXPECT_EQ(data[i].expected_match_count, matches.size());
for (size_t j = 0; j < data[i].expected_match_count; ++j) {
EXPECT_EQ(data[i].expected_matches[2 * j], matches[j].first);
EXPECT_EQ(data[i].expected_matches[2 * j + 1], matches[j].second);
}
}
}