root/chrome/browser/history/top_sites_cache.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_

#include <map>
#include <utility>

#include "base/memory/ref_counted.h"
#include "chrome/browser/history/history_types.h"
#include "chrome/browser/history/url_utils.h"
#include "url/gurl.h"

class GURL;

namespace history {

// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
// a given input URL is a two-stage process:
//
//   input URL --(map 1)--> canonical URL --(map 2)--> image.
//
// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
// input URL. If found, canonical URL assigned to the result. Otherwise the
// input URL is considered to already be a canonical URL.
//
// (map 2) simply looks up canonical URL in |images_|.
//
// The rule to "match" URL in |canonical_urls_| always favors exact match.
// - In GetCanonicalURL(), exact match is the only case examined.
// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
//   i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
//   ignoring "?query#ref".
// For the latter two "URL prefix matches", we prefer the match that is closest
// to input URL, w.r.t. path hierarchy.

// TopSitesCache caches the top sites and thumbnails for TopSites.
class TopSitesCache {
 public:
  TopSitesCache();
  ~TopSitesCache();

  // Set the top sites. In |top_sites| all forced URLs must appear before
  // non-forced URLs. This is only checked in debug.
  void SetTopSites(const MostVisitedURLList& top_sites);
  const MostVisitedURLList& top_sites() const { return top_sites_; }

  // The thumbnails.
  void SetThumbnails(const URLToImagesMap& images);
  const URLToImagesMap& images() const { return images_; }

  // Returns the thumbnail as an Image for the specified url. This adds an entry
  // for |url| if one has not yet been added.
  Images* GetImage(const GURL& url);

  // Fetches the thumbnail for the specified url. Returns true if there is a
  // thumbnail for the specified url. It is possible for a URL to be in TopSites
  // but not have an thumbnail.
  bool GetPageThumbnail(const GURL& url,
                        scoped_refptr<base::RefCountedMemory>* bytes) const;

  // Fetches the thumbnail score for the specified url. Returns true if
  // there is a thumbnail score for the specified url.
  bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;

  // Returns the canonical URL for |url|.
  const GURL& GetCanonicalURL(const GURL& url) const;

  // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|.
  // Prefers an exact match if it exists, or the least generalized match while
  // ignoring "?query#ref". Returns the resulting canonical URL if match is
  // found, otherwise returns an empty GURL.
  GURL GetGeneralizedCanonicalURL(const GURL& url) const;

  // Returns true if |url| is known.
  bool IsKnownURL(const GURL& url) const;

  // Returns the index into |top_sites_| for |url|.
  size_t GetURLIndex(const GURL& url) const;

  // Returns the number of non-forced URLs in the cache.
  size_t GetNumNonForcedURLs() const;

  // Returns the number of forced URLs in the cache.
  size_t GetNumForcedURLs() const;

 private:
  // The entries in CanonicalURLs, see CanonicalURLs for details. The second
  // argument gives the index of the URL into MostVisitedURLs redirects.
  typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;

  // Comparator used for CanonicalURLs.
  class CanonicalURLComparator {
   public:
    bool operator()(const CanonicalURLEntry& e1,
                    const CanonicalURLEntry& e2) const {
      return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
                                       e2.first->redirects[e2.second].spec());
    }
  };

  // Creates the object needed to form std::map queries into |canonical_urls_|,
  // wrapping all required temporary data to allow inlining.
  class CanonicalURLQuery {
   public:
    explicit CanonicalURLQuery(const GURL& url);
    ~CanonicalURLQuery();
    const CanonicalURLEntry& entry() { return entry_; }

   private:
    MostVisitedURL most_visited_url_;
    CanonicalURLEntry entry_;
  };

  // This is used to map from redirect url to the MostVisitedURL the redirect is
  // from. Ideally this would be map<GURL, size_t> (second param indexing into
  // top_sites_), but this results in duplicating all redirect urls. As some
  // sites have a lot of redirects, we instead use the MostVisitedURL* and the
  // index of the redirect as the key, and the index into top_sites_ as the
  // value. This way we aren't duplicating GURLs. CanonicalURLComparator
  // enforces the ordering as if we were using GURLs.
  typedef std::map<CanonicalURLEntry, size_t,
                   CanonicalURLComparator> CanonicalURLs;

  // Count the number of forced URLs.
  void CountForcedURLs();

  // Generates the set of canonical urls from |top_sites_|.
  void GenerateCanonicalURLs();

  // Stores a set of redirects. This is used by GenerateCanonicalURLs.
  void StoreRedirectChain(const RedirectList& redirects, size_t destination);

  // Returns the iterator into |canonical_urls_| for the |url|.
  CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;

  // Returns the GURL corresponding to an iterator in |canonical_urls_|.
  const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;

  // The number of top sites with forced URLs.
  size_t num_forced_urls_;

  // The top sites. This list must always contain the forced URLs first followed
  // by the non-forced URLs. This is not strictly enforced but is checked in
  // debug.
  MostVisitedURLList top_sites_;

  // The images. These map from canonical url to image.
  URLToImagesMap images_;

  // Generated from the redirects to and from the most visited pages. See
  // description above typedef for details.
  CanonicalURLs canonical_urls_;

  // Helper to clear "?query#ref" from any GURL. This is set in the constructor
  // and never modified after.
  GURL::Replacements clear_query_ref_;

  // Helper to clear "/path?query#ref" from any GURL. This is set in the
  // constructor and never modified after.
  GURL::Replacements clear_path_query_ref_;

  DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
};

}  // namespace history

#endif  // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_

/* [<][>][^][v][top][bottom][index][help] */