This source file includes following definitions.
- RecordScorerCreationStatus
- LogOdds2Prob
- Create
- ComputeScore
- model_version
- page_terms
- page_words
- max_words_per_term
- murmurhash3_seed
- ComputeRuleScore
#include "chrome/renderer/safe_browsing/scorer.h"
#include <math.h>
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_piece.h"
#include "chrome/common/safe_browsing/client_model.pb.h"
#include "chrome/renderer/safe_browsing/features.h"
namespace {
enum ScorerCreationStatus {
SCORER_SUCCESS,
SCORER_FAIL_MODEL_OPEN_FAIL,
SCORER_FAIL_MODEL_FILE_EMPTY,
SCORER_FAIL_MODEL_FILE_TOO_LARGE,
SCORER_FAIL_MODEL_PARSE_ERROR,
SCORER_FAIL_MODEL_MISSING_FIELDS,
SCORER_STATUS_MAX
};
void RecordScorerCreationStatus(ScorerCreationStatus status) {
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.ScorerCreationStatus",
status,
SCORER_STATUS_MAX);
}
}
namespace safe_browsing {
static double LogOdds2Prob(double log_odds) {
if (log_odds >= 709) {
return 1.0;
}
double odds = exp(log_odds);
return odds/(odds+1.0);
}
Scorer::Scorer() {}
Scorer::~Scorer() {}
Scorer* Scorer::Create(const base::StringPiece& model_str) {
scoped_ptr<Scorer> scorer(new Scorer());
ClientSideModel& model = scorer->model_;
if (!model.ParseFromArray(model_str.data(), model_str.size())) {
DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is "
<< "invalid.";
RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR);
return NULL;
} else if (!model.IsInitialized()) {
DLOG(ERROR) << "Unable to parse phishing model. The model is missing "
<< "some required fields. Maybe the .proto file changed?";
RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS);
return NULL;
}
RecordScorerCreationStatus(SCORER_SUCCESS);
for (int i = 0; i < model.page_term_size(); ++i) {
scorer->page_terms_.insert(model.hashes(model.page_term(i)));
}
for (int i = 0; i < model.page_word_size(); ++i) {
scorer->page_words_.insert(model.page_word(i));
}
return scorer.release();
}
double Scorer::ComputeScore(const FeatureMap& features) const {
double logodds = 0.0;
for (int i = 0; i < model_.rule_size(); ++i) {
logodds += ComputeRuleScore(model_.rule(i), features);
}
return LogOdds2Prob(logodds);
}
int Scorer::model_version() const {
return model_.version();
}
const base::hash_set<std::string>& Scorer::page_terms() const {
return page_terms_;
}
const base::hash_set<uint32>& Scorer::page_words() const {
return page_words_;
}
size_t Scorer::max_words_per_term() const {
return model_.max_words_per_term();
}
uint32 Scorer::murmurhash3_seed() const {
return model_.murmur_hash_seed();
}
double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
const FeatureMap& features) const {
const base::hash_map<std::string, double>& feature_map = features.features();
double rule_score = 1.0;
for (int i = 0; i < rule.feature_size(); ++i) {
base::hash_map<std::string, double>::const_iterator it = feature_map.find(
model_.hashes(rule.feature(i)));
if (it == feature_map.end() || it->second == 0.0) {
return 0.0;
}
rule_score *= it->second;
}
return rule_score * rule.weight();
}
}