This source file includes following definitions.
- FilterBenignIpsOnIOThread
- resource_type
- AddFeature
- AddMalwareIpUrlInfo
- AddNavigationFeatures
- weak_factory_
- ExtractFeatures
- ExtractMalwareFeatures
- ExtractBrowseInfoFeatures
- StartExtractFeatures
- QueryUrlHistoryDone
- QueryHttpHostVisitsDone
- QueryHttpsHostVisitsDone
- SetHostVisitsFeatures
- StorePendingQuery
- GetPendingQuery
- GetHistoryService
- FinishExtractMalwareFeatures
#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
#include <map>
#include <utility>
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/format_macros.h"
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "base/time/time.h"
#include "chrome/browser/common/cancelable_request.h"
#include "chrome/browser/history/history_service.h"
#include "chrome/browser/history/history_service_factory.h"
#include "chrome/browser/history/history_types.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/safe_browsing/browser_features.h"
#include "chrome/browser/safe_browsing/client_side_detection_host.h"
#include "chrome/browser/safe_browsing/database_manager.h"
#include "chrome/common/safe_browsing/csd.pb.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/page_transition_types.h"
#include "url/gurl.h"
using content::BrowserThread;
using content::NavigationController;
using content::NavigationEntry;
using content::WebContents;
namespace safe_browsing {
namespace {
const int kMaxMalwareIPPerRequest = 5;
void FilterBenignIpsOnIOThread(
scoped_refptr<SafeBrowsingDatabaseManager> database_manager,
IPUrlMap* ips) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
for (IPUrlMap::iterator it = ips->begin(); it != ips->end();) {
if (!database_manager.get() ||
!database_manager->MatchMalwareIP(it->first)) {
ips->erase(it++);
} else {
++it;
}
}
}
}
IPUrlInfo::IPUrlInfo(const std::string& url,
const std::string& method,
const std::string& referrer,
const ResourceType::Type& resource_type)
: url(url),
method(method),
referrer(referrer),
resource_type(resource_type) {
}
IPUrlInfo::~IPUrlInfo() {}
BrowseInfo::BrowseInfo() : http_status_code(0) {}
BrowseInfo::~BrowseInfo() {}
static void AddFeature(const std::string& feature_name,
double feature_value,
ClientPhishingRequest* request) {
DCHECK(request);
ClientPhishingRequest::Feature* feature =
request->add_non_model_feature_map();
feature->set_name(feature_name);
feature->set_value(feature_value);
VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
}
static void AddMalwareIpUrlInfo(const std::string& ip,
const std::vector<IPUrlInfo>& meta_infos,
ClientMalwareRequest* request) {
DCHECK(request);
for (std::vector<IPUrlInfo>::const_iterator it = meta_infos.begin();
it != meta_infos.end(); ++it) {
ClientMalwareRequest::UrlInfo* urlinfo =
request->add_bad_ip_url_info();
urlinfo->set_ip(ip);
urlinfo->set_url(it->url);
urlinfo->set_method(it->method);
urlinfo->set_referrer(it->referrer);
urlinfo->set_resource_type(static_cast<int>(it->resource_type));
}
DVLOG(2) << "Added url info for bad ip: " << ip;
}
static void AddNavigationFeatures(
const std::string& feature_prefix,
const NavigationController& controller,
int index,
const std::vector<GURL>& redirect_chain,
ClientPhishingRequest* request) {
NavigationEntry* entry = controller.GetEntryAtIndex(index);
bool is_secure_referrer = entry->GetReferrer().url.SchemeIsSecure();
if (!is_secure_referrer) {
AddFeature(base::StringPrintf("%s%s=%s",
feature_prefix.c_str(),
features::kReferrer,
entry->GetReferrer().url.spec().c_str()),
1.0,
request);
}
AddFeature(feature_prefix + features::kHasSSLReferrer,
is_secure_referrer ? 1.0 : 0.0,
request);
AddFeature(feature_prefix + features::kPageTransitionType,
static_cast<double>(
content::PageTransitionStripQualifier(
entry->GetTransitionType())),
request);
AddFeature(feature_prefix + features::kIsFirstNavigation,
index == 0 ? 1.0 : 0.0,
request);
if (redirect_chain.empty()) {
NOTREACHED();
return;
}
if (redirect_chain.back() != entry->GetURL()) {
DLOG(WARNING) << "Expected:" << entry->GetURL()
<< " Actual:" << redirect_chain.back();
AddFeature(feature_prefix + features::kRedirectUrlMismatch,
1.0,
request);
return;
}
for (size_t i = 0; i < redirect_chain.size() - 1; i++) {
std::string printable_redirect = redirect_chain[i].spec();
if (redirect_chain[i].SchemeIsSecure()) {
printable_redirect = features::kSecureRedirectValue;
}
AddFeature(base::StringPrintf("%s%s[%" PRIuS "]=%s",
feature_prefix.c_str(),
features::kRedirect,
i,
printable_redirect.c_str()),
1.0,
request);
}
}
BrowserFeatureExtractor::BrowserFeatureExtractor(
WebContents* tab,
ClientSideDetectionHost* host)
: tab_(tab),
host_(host),
weak_factory_(this) {
DCHECK(tab);
}
BrowserFeatureExtractor::~BrowserFeatureExtractor() {
weak_factory_.InvalidateWeakPtrs();
STLDeleteContainerPairFirstPointers(pending_extractions_.begin(),
pending_extractions_.end());
HistoryService* history;
bool success = GetHistoryService(&history);
DCHECK(success || pending_queries_.size() == 0);
for (PendingQueriesMap::iterator it = pending_queries_.begin();
it != pending_queries_.end(); ++it) {
if (history) {
history->CancelRequest(it->first);
}
ExtractionData& extraction = it->second;
delete extraction.first;
}
pending_queries_.clear();
}
void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info,
ClientPhishingRequest* request,
const DoneCallback& callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(request);
DCHECK(info);
DCHECK_EQ(0U, request->url().find("http:"));
DCHECK(!callback.is_null());
const NavigationController& controller = tab_->GetController();
int url_index = -1;
int first_host_index = -1;
GURL request_url(request->url());
int index = controller.GetCurrentEntryIndex();
DCHECK_NE(index, -1);
for (; index >= 0; index--) {
NavigationEntry* entry = controller.GetEntryAtIndex(index);
if (url_index == -1 && entry->GetURL() == request_url) {
url_index = index;
} else if (index < url_index) {
if (entry->GetURL().host() == request_url.host()) {
first_host_index = index;
} else {
break;
}
}
}
if (url_index != -1) {
AddNavigationFeatures(
std::string(), controller, url_index, info->url_redirects, request);
}
if (first_host_index != -1) {
AddNavigationFeatures(features::kHostPrefix,
controller,
first_host_index,
info->host_redirects,
request);
}
ExtractBrowseInfoFeatures(*info, request);
pending_extractions_[request] = callback;
base::MessageLoop::current()->PostTask(
FROM_HERE,
base::Bind(&BrowserFeatureExtractor::StartExtractFeatures,
weak_factory_.GetWeakPtr(), request, callback));
}
void BrowserFeatureExtractor::ExtractMalwareFeatures(
BrowseInfo* info,
ClientMalwareRequest* request,
const MalwareDoneCallback& callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(!callback.is_null());
scoped_ptr<IPUrlMap> ips(new IPUrlMap);
ips->swap(info->ips);
IPUrlMap* ips_ptr = ips.get();
scoped_ptr<ClientMalwareRequest> req(request);
BrowserThread::PostTaskAndReply(
BrowserThread::IO,
FROM_HERE,
base::Bind(&FilterBenignIpsOnIOThread,
host_->database_manager(),
ips_ptr),
base::Bind(&BrowserFeatureExtractor::FinishExtractMalwareFeatures,
weak_factory_.GetWeakPtr(),
base::Passed(&ips), callback, base::Passed(&req)));
}
void BrowserFeatureExtractor::ExtractBrowseInfoFeatures(
const BrowseInfo& info,
ClientPhishingRequest* request) {
if (info.unsafe_resource.get()) {
AddFeature(features::kSafeBrowsingMaliciousUrl +
info.unsafe_resource->url.spec(),
1.0,
request);
AddFeature(features::kSafeBrowsingOriginalUrl +
info.unsafe_resource->original_url.spec(),
1.0,
request);
AddFeature(features::kSafeBrowsingIsSubresource,
info.unsafe_resource->is_subresource ? 1.0 : 0.0,
request);
AddFeature(features::kSafeBrowsingThreatType,
static_cast<double>(info.unsafe_resource->threat_type),
request);
}
if (info.http_status_code != 0) {
AddFeature(features::kHttpStatusCode, info.http_status_code, request);
}
}
void BrowserFeatureExtractor::StartExtractFeatures(
ClientPhishingRequest* request,
const DoneCallback& callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
size_t removed = pending_extractions_.erase(request);
DCHECK_EQ(1U, removed);
HistoryService* history;
if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
callback.Run(false, request);
return;
}
CancelableRequestProvider::Handle handle = history->QueryURL(
GURL(request->url()),
true ,
&request_consumer_,
base::Bind(&BrowserFeatureExtractor::QueryUrlHistoryDone,
base::Unretained(this)));
StorePendingQuery(handle, request, callback);
}
void BrowserFeatureExtractor::QueryUrlHistoryDone(
CancelableRequestProvider::Handle handle,
bool success,
const history::URLRow* row,
history::VisitVector* visits) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
ClientPhishingRequest* request;
DoneCallback callback;
if (!GetPendingQuery(handle, &request, &callback)) {
DLOG(FATAL) << "No pending history query found";
return;
}
DCHECK(request);
DCHECK(!callback.is_null());
if (!success) {
callback.Run(false, request);
return;
}
AddFeature(features::kUrlHistoryVisitCount,
static_cast<double>(row->visit_count()),
request);
base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
int num_visits_24h_ago = 0;
int num_visits_typed = 0;
int num_visits_link = 0;
for (history::VisitVector::const_iterator it = visits->begin();
it != visits->end(); ++it) {
if (!content::PageTransitionIsMainFrame(it->transition)) {
continue;
}
if (it->visit_time < threshold) {
++num_visits_24h_ago;
}
content::PageTransition transition = content::PageTransitionStripQualifier(
it->transition);
if (transition == content::PAGE_TRANSITION_TYPED) {
++num_visits_typed;
} else if (transition == content::PAGE_TRANSITION_LINK) {
++num_visits_link;
}
}
AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
static_cast<double>(num_visits_24h_ago),
request);
AddFeature(features::kUrlHistoryTypedCount,
static_cast<double>(num_visits_typed),
request);
AddFeature(features::kUrlHistoryLinkCount,
static_cast<double>(num_visits_link),
request);
HistoryService* history;
if (!GetHistoryService(&history)) {
callback.Run(false, request);
return;
}
CancelableRequestProvider::Handle next_handle =
history->GetVisibleVisitCountToHost(
GURL(request->url()),
&request_consumer_,
base::Bind(&BrowserFeatureExtractor::QueryHttpHostVisitsDone,
base::Unretained(this)));
StorePendingQuery(next_handle, request, callback);
}
void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
CancelableRequestProvider::Handle handle,
bool success,
int num_visits,
base::Time first_visit) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
ClientPhishingRequest* request;
DoneCallback callback;
if (!GetPendingQuery(handle, &request, &callback)) {
DLOG(FATAL) << "No pending history query found";
return;
}
DCHECK(request);
DCHECK(!callback.is_null());
if (!success) {
callback.Run(false, request);
return;
}
SetHostVisitsFeatures(num_visits, first_visit, true, request);
HistoryService* history;
if (!GetHistoryService(&history)) {
callback.Run(false, request);
return;
}
std::string https_url = request->url();
CancelableRequestProvider::Handle next_handle =
history->GetVisibleVisitCountToHost(
GURL(https_url.replace(0, 5, "https:")),
&request_consumer_,
base::Bind(&BrowserFeatureExtractor::QueryHttpsHostVisitsDone,
base::Unretained(this)));
StorePendingQuery(next_handle, request, callback);
}
void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
CancelableRequestProvider::Handle handle,
bool success,
int num_visits,
base::Time first_visit) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
ClientPhishingRequest* request;
DoneCallback callback;
if (!GetPendingQuery(handle, &request, &callback)) {
DLOG(FATAL) << "No pending history query found";
return;
}
DCHECK(request);
DCHECK(!callback.is_null());
if (!success) {
callback.Run(false, request);
return;
}
SetHostVisitsFeatures(num_visits, first_visit, false, request);
callback.Run(true, request);
}
void BrowserFeatureExtractor::SetHostVisitsFeatures(
int num_visits,
base::Time first_visit,
bool is_http_query,
ClientPhishingRequest* request) {
DCHECK(request);
AddFeature(is_http_query ?
features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
static_cast<double>(num_visits),
request);
if (num_visits > 0) {
AddFeature(
is_http_query ?
features::kFirstHttpHostVisitMoreThan24hAgo :
features::kFirstHttpsHostVisitMoreThan24hAgo,
(first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
1.0 : 0.0,
request);
}
}
void BrowserFeatureExtractor::StorePendingQuery(
CancelableRequestProvider::Handle handle,
ClientPhishingRequest* request,
const DoneCallback& callback) {
DCHECK_EQ(0U, pending_queries_.count(handle));
pending_queries_[handle] = std::make_pair(request, callback);
}
bool BrowserFeatureExtractor::GetPendingQuery(
CancelableRequestProvider::Handle handle,
ClientPhishingRequest** request,
DoneCallback* callback) {
PendingQueriesMap::iterator it = pending_queries_.find(handle);
DCHECK(it != pending_queries_.end());
if (it != pending_queries_.end()) {
*request = it->second.first;
*callback = it->second.second;
pending_queries_.erase(it);
return true;
}
return false;
}
bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
*history = NULL;
if (tab_ && tab_->GetBrowserContext()) {
Profile* profile = Profile::FromBrowserContext(tab_->GetBrowserContext());
*history = HistoryServiceFactory::GetForProfile(profile,
Profile::EXPLICIT_ACCESS);
if (*history) {
return true;
}
}
VLOG(2) << "Unable to query history. No history service available.";
return false;
}
void BrowserFeatureExtractor::FinishExtractMalwareFeatures(
scoped_ptr<IPUrlMap> bad_ips,
MalwareDoneCallback callback,
scoped_ptr<ClientMalwareRequest> request) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
int matched_bad_ips = 0;
for (IPUrlMap::const_iterator it = bad_ips->begin();
it != bad_ips->end(); ++it) {
AddMalwareIpUrlInfo(it->first, it->second, request.get());
++matched_bad_ips;
if (matched_bad_ips >= kMaxMalwareIPPerRequest) {
break;
}
}
callback.Run(true, request.Pass());
}
}