This source file includes following definitions.
- SetUp
- ExtractFeatures
- PartialExtractFeatures
- ExtractionDone
- QuitExtraction
- TEST_F
- TEST_F
- TEST_F
#include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
#include <string>
#include "base/bind.h"
#include "base/callback.h"
#include "base/containers/hash_tables.h"
#include "base/memory/scoped_ptr.h"
#include "base/message_loop/message_loop.h"
#include "base/strings/string16.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "crypto/sha2.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
using base::ASCIIToUTF16;
using ::testing::Return;
namespace safe_browsing {
class PhishingTermFeatureExtractorTest : public ::testing::Test {
protected:
virtual void SetUp() {
base::hash_set<std::string> terms;
terms.insert("one");
terms.insert("one one");
terms.insert("two");
terms.insert("multi word test");
terms.insert("capitalization");
terms.insert("space");
terms.insert("separator");
terms.insert("punctuation");
terms.insert("\xe4\xbd\xa0\xe5\xa5\xbd");
terms.insert("\xe5\x86\x8d\xe8\xa7\x81");
for (base::hash_set<std::string>::iterator it = terms.begin();
it != terms.end(); ++it) {
term_hashes_.insert(crypto::SHA256HashString(*it));
}
base::hash_set<std::string> words;
words.insert("one");
words.insert("two");
words.insert("multi");
words.insert("word");
words.insert("test");
words.insert("capitalization");
words.insert("space");
words.insert("separator");
words.insert("punctuation");
words.insert("\xe4\xbd\xa0\xe5\xa5\xbd");
words.insert("\xe5\x86\x8d\xe8\xa7\x81");
static const uint32 kMurmurHash3Seed = 2777808611U;
for (base::hash_set<std::string>::iterator it = words.begin();
it != words.end(); ++it) {
word_hashes_.insert(MurmurHash3String(*it, kMurmurHash3Seed));
}
extractor_.reset(new PhishingTermFeatureExtractor(
&term_hashes_,
&word_hashes_,
3 ,
kMurmurHash3Seed,
&clock_));
}
bool ExtractFeatures(const base::string16* page_text, FeatureMap* features) {
success_ = false;
extractor_->ExtractFeatures(
page_text,
features,
base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone,
base::Unretained(this)));
msg_loop_.Run();
return success_;
}
void PartialExtractFeatures(const base::string16* page_text,
FeatureMap* features) {
extractor_->ExtractFeatures(
page_text,
features,
base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone,
base::Unretained(this)));
msg_loop_.PostTask(
FROM_HERE,
base::Bind(&PhishingTermFeatureExtractorTest::QuitExtraction,
base::Unretained(this)));
msg_loop_.RunUntilIdle();
}
void ExtractionDone(bool success) {
success_ = success;
msg_loop_.Quit();
}
void QuitExtraction() {
extractor_->CancelPendingExtraction();
msg_loop_.Quit();
}
base::MessageLoop msg_loop_;
MockFeatureExtractorClock clock_;
scoped_ptr<PhishingTermFeatureExtractor> extractor_;
base::hash_set<std::string> term_hashes_;
base::hash_set<uint32> word_hashes_;
bool success_;
};
TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) {
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
base::string16 page_text = ASCIIToUTF16("blah");
FeatureMap expected_features;
FeatureMap features;
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text = ASCIIToUTF16("one one");
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("one"));
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("one one"));
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text = ASCIIToUTF16("bla bla multi word test bla");
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("multi word test"));
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text = ASCIIToUTF16("bla bla test word multi bla");
expected_features.Clear();
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text = ASCIIToUTF16("Capitalization plus non-space\n"
"separator... punctuation!");
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("capitalization"));
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("space"));
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("separator"));
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("punctuation"));
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text = base::string16();
expected_features.Clear();
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
page_text =
base::UTF8ToUTF16("\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x86\x8d\xe8\xa7\x81");
expected_features.Clear();
expected_features.AddBooleanFeature(
features::kPageTerm + std::string("\xe4\xbd\xa0\xe5\xa5\xbd"));
expected_features.AddBooleanFeature(
features::kPageTerm + std::string("\xe5\x86\x8d\xe8\xa7\x81"));
features.Clear();
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
TEST_F(PhishingTermFeatureExtractorTest, Continuation) {
base::string16 page_text(ASCIIToUTF16("one "));
for (int i = 0; i < 28; ++i) {
page_text.append(ASCIIToUTF16(base::StringPrintf("%d ", i)));
}
page_text.append(ASCIIToUTF16("two"));
base::TimeTicks now = base::TimeTicks::Now();
EXPECT_CALL(clock_, Now())
.WillOnce(Return(now))
.WillOnce(Return(now))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(3)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(9)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)));
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("one"));
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("two"));
FeatureMap features;
ASSERT_TRUE(ExtractFeatures(&page_text, &features));
ExpectFeatureMapsAreEqual(features, expected_features);
::testing::Mock::VerifyAndClearExpectations(&clock_);
EXPECT_CALL(clock_, Now())
.WillOnce(Return(now))
.WillOnce(Return(now))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
features.Clear();
EXPECT_FALSE(ExtractFeatures(&page_text, &features));
}
TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) {
scoped_ptr<base::string16> page_text(
new base::string16(ASCIIToUTF16("one ")));
for (int i = 0; i < 28; ++i) {
page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i)));
}
base::TimeTicks now = base::TimeTicks::Now();
EXPECT_CALL(clock_, Now())
.WillOnce(Return(now))
.WillOnce(Return(now))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(7)))
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14)));
FeatureMap features;
PartialExtractFeatures(page_text.get(), &features);
page_text.reset(new base::string16());
for (int i = 30; i < 58; ++i) {
page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i)));
}
page_text->append(ASCIIToUTF16("multi word test "));
features.Clear();
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
EXPECT_TRUE(ExtractFeatures(page_text.get(), &features));
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageTerm +
std::string("multi word test"));
ExpectFeatureMapsAreEqual(features, expected_features);
}
}