root/chrome/browser/safe_browsing/safe_browsing_util.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. SBFullHashEqual

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Utilities for the SafeBrowsing code.

#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_

#include <cstring>
#include <deque>
#include <set>
#include <string>
#include <vector>

#include "base/basictypes.h"
#include "base/strings/string_piece.h"
#include "chrome/browser/safe_browsing/chunk_range.h"

class GURL;

class SBEntry;

// A truncated hash's type.
typedef uint32 SBPrefix;

// Container for holding a chunk URL and the list it belongs to.
struct ChunkUrl {
  std::string url;
  std::string list_name;
};

// A full hash.
union SBFullHash {
  char full_hash[32];
  SBPrefix prefix;
};

inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
  return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
}

// Generate full hash for the given string.
SBFullHash SBFullHashForString(const base::StringPiece& str);

// Container for information about a specific host in an add/sub chunk.
struct SBChunkHost {
  SBPrefix host;
  SBEntry* entry;
};

// Container for an add/sub chunk.
struct SBChunk {
  SBChunk();
  ~SBChunk();

  int chunk_number;
  int list_id;
  bool is_add;
  std::deque<SBChunkHost> hosts;
};

// Container for a set of chunks.  Interim wrapper to replace use of
// |std::deque<SBChunk>| with something having safer memory semantics.
// management.
// TODO(shess): |SBEntry| is currently a very roundabout way to hold
// things pending storage.  It could be replaced with the structures
// used in SafeBrowsingStore, then lots of bridging code could
// dissappear.
class SBChunkList {
 public:
  SBChunkList();
  ~SBChunkList();

  // Implement that subset of the |std::deque<>| interface which
  // callers expect.
  bool empty() const { return chunks_.empty(); }
  size_t size() { return chunks_.size(); }

  void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
  SBChunk& back() { return chunks_.back(); }
  SBChunk& front() { return chunks_.front(); }
  const SBChunk& front() const { return chunks_.front(); }

  typedef std::vector<SBChunk>::const_iterator const_iterator;
  const_iterator begin() const { return chunks_.begin(); }
  const_iterator end() const { return chunks_.end(); }

  typedef std::vector<SBChunk>::iterator iterator;
  iterator begin() { return chunks_.begin(); }
  iterator end() { return chunks_.end(); }

  SBChunk& operator[](size_t n) { return chunks_[n]; }
  const SBChunk& operator[](size_t n) const { return chunks_[n]; }

  // Calls |SBEvent::Destroy()| before clearing |chunks_|.
  void clear();

 private:
  std::vector<SBChunk> chunks_;

  DISALLOW_COPY_AND_ASSIGN(SBChunkList);
};

// Used when we get a gethash response.
struct SBFullHashResult {
  SBFullHash hash;
  std::string list_name;
  int add_chunk_id;
};

// Contains information about a list in the database.
struct SBListChunkRanges {
  explicit SBListChunkRanges(const std::string& n);

  std::string name;  // The list name.
  std::string adds;  // The ranges for add chunks.
  std::string subs;  // The ranges for sub chunks.
};

// Container for deleting chunks from the database.
struct SBChunkDelete {
  SBChunkDelete();
  ~SBChunkDelete();

  std::string list_name;
  bool is_sub_del;
  std::vector<ChunkRange> chunk_del;
};

// Different types of threats that SafeBrowsing protects against.
enum SBThreatType {
  // No threat at all.
  SB_THREAT_TYPE_SAFE,

  // The URL is being used for phishing.
  SB_THREAT_TYPE_URL_PHISHING,

  // The URL hosts malware.
  SB_THREAT_TYPE_URL_MALWARE,

  // The download URL is malware.
  SB_THREAT_TYPE_BINARY_MALWARE_URL,

  // Url detected by the client-side phishing model.  Note that unlike the
  // above values, this does not correspond to a downloaded list.
  SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,

  // The Chrome extension or app (given by its ID) is malware.
  SB_THREAT_TYPE_EXTENSION,

  // Url detected by the client-side malware IP list. This IP list is part
  // of the client side detection model.
  SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
};

// SBEntry ---------------------------------------------------------------------

// Holds information about the prefixes for a hostkey.  prefixes can either be
// 4 bytes (truncated hash) or 32 bytes (full hash).
// For adds:
//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
// For subs:
//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
//       [add chunk][prefix][add chunk][prefix]
class SBEntry {
 public:
  enum Type {
    ADD_PREFIX,     // 4 byte add entry.
    SUB_PREFIX,     // 4 byte sub entry.
    ADD_FULL_HASH,  // 32 byte add entry.
    SUB_FULL_HASH,  // 32 byte sub entry.
  };

  // Creates a SBEntry with the necessary size for the given number of prefixes.
  // Caller ownes the object and needs to free it by calling Destroy.
  static SBEntry* Create(Type type, int prefix_count);

  // Frees the entry's memory.
  void Destroy();

  void set_list_id(int list_id) { data_.list_id = list_id; }
  int list_id() const { return data_.list_id; }
  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
  int chunk_id() const { return data_.chunk_id; }
  int prefix_count() const { return data_.prefix_count; }

  // Returns true if this is a prefix as opposed to a full hash.
  bool IsPrefix() const {
    return type() == ADD_PREFIX || type() == SUB_PREFIX;
  }

  // Returns true if this is an add entry.
  bool IsAdd() const {
    return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
  }

  // Returns true if this is a sub entry.
  bool IsSub() const {
    return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
  }

  // Helper to return the size of the prefixes.
  int HashLen() const {
    return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
  }

  // For add entries, returns the add chunk id.  For sub entries, returns the
  // add_chunk id for the prefix at the given index.
  int ChunkIdAtPrefix(int index) const;

  // Used for sub chunks to set the chunk id at a given index.
  void SetChunkIdAtPrefix(int index, int chunk_id);

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  const SBPrefix& PrefixAt(int index) const;
  const SBFullHash& FullHashAt(int index) const;

  // Return the prefix/full hash at the given index.  Caller is expected to
  // call the right function based on the hash length.
  void SetPrefixAt(int index, const SBPrefix& prefix);
  void SetFullHashAt(int index, const SBFullHash& full_hash);

 private:
  // Container for a sub prefix.
  struct SBSubPrefix {
    int add_chunk;
    SBPrefix prefix;
  };

  // Container for a sub full hash.
  struct SBSubFullHash {
    int add_chunk;
    SBFullHash prefix;
  };

  // Keep the fixed data together in one struct so that we can get its size
  // easily.  If any of this is modified, the database will have to be cleared.
  struct Data {
    int list_id;
    // For adds, this is the add chunk number.
    // For subs: if prefix_count is 0 then this is the add chunk that this sub
    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    //     or sub_full_hashes is used for each corresponding prefix.
    int chunk_id;
    Type type;
    int prefix_count;
  };

  SBEntry();
  ~SBEntry();

  // Helper to return the size of each prefix entry (i.e. for subs this
  // includes an add chunk id).
  static int PrefixSize(Type type);

  // Helper to return how much memory a given Entry would require.
  static int Size(Type type, int prefix_count);

  // Returns how many bytes this entry is.
  int Size() const;

  Type type() const { return data_.type; }

  void set_prefix_count(int count) { data_.prefix_count = count; }
  void set_type(Type type) { data_.type = type; }

  // The prefixes union must follow the fixed data so that they're contiguous
  // in memory.
  Data data_;
  union {
    SBPrefix add_prefixes_[1];
    SBSubPrefix sub_prefixes_[1];
    SBFullHash add_full_hashes_[1];
    SBSubFullHash sub_full_hashes_[1];
  };
};


// Utility functions -----------------------------------------------------------

namespace safe_browsing_util {

// SafeBrowsing list names.
extern const char kMalwareList[];
extern const char kPhishingList[];
// Binary Download list name.
extern const char kBinUrlList[];
// SafeBrowsing client-side detection whitelist list name.
extern const char kCsdWhiteList[];
// SafeBrowsing download whitelist list name.
extern const char kDownloadWhiteList[];
// SafeBrowsing extension list name.
extern const char kExtensionBlacklist[];
// SafeBrowsing side-effect free whitelist name.
extern const char kSideEffectFreeWhitelist[];
// SafeBrowsing csd malware IP blacklist name.
extern const char kIPBlacklist[];

// This array must contain all Safe Browsing lists.
extern const char* kAllLists[8];

enum ListType {
  INVALID = -1,
  MALWARE = 0,
  PHISH = 1,
  BINURL = 2,
  // Obsolete BINHASH = 3,
  CSDWHITELIST = 4,
  // SafeBrowsing lists are stored in pairs.  Keep ListType 5
  // available for a potential second list that we would store in the
  // csd-whitelist store file.
  DOWNLOADWHITELIST = 6,
  // See above comment. Leave 7 available.
  EXTENSIONBLACKLIST = 8,
  // See above comment. Leave 9 available.
  SIDEEFFECTFREEWHITELIST = 10,
  // See above comment. Leave 11 available.
  IPBLACKLIST = 12,
  // See above comment.  Leave 13 available.
};

// Maps a list name to ListType.
ListType GetListId(const std::string& name);

// Maps a ListId to list name. Return false if fails.
bool GetListName(ListType list_id, std::string* list);

// Canonicalizes url as per Google Safe Browsing Specification.
// See section 6.1 in
// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
                     std::string* canonicalized_path,
                     std::string* canonicalized_query);

// Given a URL, returns all the hosts we need to check.  They are returned
// in order of size (i.e. b.c is first, then a.b.c).
void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);

// Given a URL, returns all the paths we need to check.
void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);

// Given a URL, returns all the patterns we need to check.
void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);

int GetHashIndex(const SBFullHash& hash,
                 const std::vector<SBFullHashResult>& full_hashes);

// Given a URL, compare all the possible host + path full hashes to the set of
// provided full hashes.  Returns the index of the match if one is found, or -1
// otherwise.
int GetUrlHashIndex(const GURL& url,
                    const std::vector<SBFullHashResult>& full_hashes);

bool IsPhishingList(const std::string& list_name);
bool IsMalwareList(const std::string& list_name);
bool IsBadbinurlList(const std::string& list_name);
bool IsExtensionList(const std::string& list_name);

GURL GeneratePhishingReportUrl(const std::string& report_page,
                               const std::string& url_to_report,
                               bool is_client_side_detection);

SBFullHash StringToSBFullHash(const std::string& hash_in);
std::string SBFullHashToString(const SBFullHash& hash_out);

}  // namespace safe_browsing_util

#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_

/* [<][>][^][v][top][bottom][index][help] */