root/content/browser/storage_partition_impl_map.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. file_system_context_
  2. MaybeCreateJob
  3. GetStoragePartitionDomainPath
  4. ObliterateOneDirectory
  5. BlockingObliteratePath
  6. BlockingGarbageCollect
  7. GetStoragePartitionPath
  8. resource_context_initialized_
  9. Get
  10. AsyncObliterate
  11. GarbageCollect
  12. ForEach
  13. PostCreateInitialization

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/browser/storage_partition_impl_map.h"

#include "base/bind.h"
#include "base/callback.h"
#include "base/file_util.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_path.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/threading/sequenced_worker_pool.h"
#include "content/browser/appcache/chrome_appcache_service.h"
#include "content/browser/fileapi/browser_file_system_helper.h"
#include "content/browser/fileapi/chrome_blob_storage_context.h"
#include "content/browser/loader/resource_request_info_impl.h"
#include "content/browser/resource_context_impl.h"
#include "content/browser/service_worker/service_worker_request_handler.h"
#include "content/browser/storage_partition_impl.h"
#include "content/browser/streams/stream.h"
#include "content/browser/streams/stream_context.h"
#include "content/browser/streams/stream_registry.h"
#include "content/browser/streams/stream_url_request_job.h"
#include "content/browser/webui/url_data_manager_backend.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/content_browser_client.h"
#include "content/public/browser/storage_partition.h"
#include "content/public/common/content_constants.h"
#include "content/public/common/url_constants.h"
#include "crypto/sha2.h"
#include "net/url_request/url_request_context.h"
#include "net/url_request/url_request_context_getter.h"
#include "webkit/browser/blob/blob_storage_context.h"
#include "webkit/browser/blob/blob_url_request_job_factory.h"
#include "webkit/browser/fileapi/file_system_url_request_job_factory.h"
#include "webkit/common/blob/blob_data.h"

using appcache::AppCacheService;
using fileapi::FileSystemContext;
using webkit_blob::BlobStorageContext;

namespace content {

namespace {

// A derivative that knows about Streams too.
class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler {
 public:
  BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context,
                      StreamContext* stream_context,
                      fileapi::FileSystemContext* file_system_context)
      : blob_storage_context_(blob_storage_context),
        stream_context_(stream_context),
        file_system_context_(file_system_context) {
  }

  virtual ~BlobProtocolHandler() {
  }

  virtual net::URLRequestJob* MaybeCreateJob(
      net::URLRequest* request,
      net::NetworkDelegate* network_delegate) const OVERRIDE {
    scoped_refptr<Stream> stream =
        stream_context_->registry()->GetStream(request->url());
    if (stream.get())
      return new StreamURLRequestJob(request, network_delegate, stream);

    if (!blob_protocol_handler_) {
      // Construction is deferred because 'this' is constructed on
      // the main thread but we want blob_protocol_handler_ constructed
      // on the IO thread.
      blob_protocol_handler_.reset(
          new webkit_blob::BlobProtocolHandler(
              blob_storage_context_->context(),
              file_system_context_,
              BrowserThread::GetMessageLoopProxyForThread(
                  BrowserThread::FILE).get()));
    }
    return blob_protocol_handler_->MaybeCreateJob(request, network_delegate);
  }

 private:
  const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_;
  const scoped_refptr<StreamContext> stream_context_;
  const scoped_refptr<fileapi::FileSystemContext> file_system_context_;
  mutable scoped_ptr<webkit_blob::BlobProtocolHandler> blob_protocol_handler_;
  DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler);
};

// These constants are used to create the directory structure under the profile
// where renderers with a non-default storage partition keep their persistent
// state. This will contain a set of directories that partially mirror the
// directory structure of BrowserContext::GetPath().
//
// The kStoragePartitionDirname contains an extensions directory which is
// further partitioned by extension id, followed by another level of directories
// for the "default" extension storage partition and one directory for each
// persistent partition used by a webview tag. Example:
//
//   Storage/ext/ABCDEF/def
//   Storage/ext/ABCDEF/hash(partition name)
//
// The code in GetStoragePartitionPath() constructs these path names.
//
// TODO(nasko): Move extension related path code out of content.
const base::FilePath::CharType kStoragePartitionDirname[] =
    FILE_PATH_LITERAL("Storage");
const base::FilePath::CharType kExtensionsDirname[] =
    FILE_PATH_LITERAL("ext");
const base::FilePath::CharType kDefaultPartitionDirname[] =
    FILE_PATH_LITERAL("def");
const base::FilePath::CharType kTrashDirname[] =
    FILE_PATH_LITERAL("trash");

// Because partition names are user specified, they can be arbitrarily long
// which makes them unsuitable for paths names. We use a truncation of a
// SHA256 hash to perform a deterministic shortening of the string. The
// kPartitionNameHashBytes constant controls the length of the truncation.
// We use 6 bytes, which gives us 99.999% reliability against collisions over
// 1 million partition domains.
//
// Analysis:
// We assume that all partition names within one partition domain are
// controlled by the the same entity. Thus there is no chance for adverserial
// attack and all we care about is accidental collision. To get 5 9s over
// 1 million domains, we need the probability of a collision in any one domain
// to be
//
//    p < nroot(1000000, .99999) ~= 10^-11
//
// We use the following birthday attack approximation to calculate the max
// number of unique names for this probability:
//
//    n(p,H) = sqrt(2*H * ln(1/(1-p)))
//
// For a 6-byte hash, H = 2^(6*8).  n(10^-11, H) ~= 75
//
// An average partition domain is likely to have less than 10 unique
// partition names which is far lower than 75.
//
// Note, that for 4 9s of reliability, the limit is 237 partition names per
// partition domain.
const int kPartitionNameHashBytes = 6;

// Needed for selecting all files in ObliterateOneDirectory() below.
#if defined(OS_POSIX)
const int kAllFileTypes = base::FileEnumerator::FILES |
                          base::FileEnumerator::DIRECTORIES |
                          base::FileEnumerator::SHOW_SYM_LINKS;
#else
const int kAllFileTypes = base::FileEnumerator::FILES |
                          base::FileEnumerator::DIRECTORIES;
#endif

base::FilePath GetStoragePartitionDomainPath(
    const std::string& partition_domain) {
  CHECK(IsStringUTF8(partition_domain));

  return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname)
      .Append(base::FilePath::FromUTF8Unsafe(partition_domain));
}

// Helper function for doing a depth-first deletion of the data on disk.
// Examines paths directly in |current_dir| (no recursion) and tries to
// delete from disk anything that is in, or isn't a parent of something in
// |paths_to_keep|. Paths that need further expansion are added to
// |paths_to_consider|.
void ObliterateOneDirectory(const base::FilePath& current_dir,
                            const std::vector<base::FilePath>& paths_to_keep,
                            std::vector<base::FilePath>* paths_to_consider) {
  CHECK(current_dir.IsAbsolute());

  base::FileEnumerator enumerator(current_dir, false, kAllFileTypes);
  for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty();
       to_delete = enumerator.Next()) {
    // Enum tracking which of the 3 possible actions to take for |to_delete|.
    enum { kSkip, kEnqueue, kDelete } action = kDelete;

    for (std::vector<base::FilePath>::const_iterator to_keep =
             paths_to_keep.begin();
         to_keep != paths_to_keep.end();
         ++to_keep) {
      if (to_delete == *to_keep) {
        action = kSkip;
        break;
      } else if (to_delete.IsParent(*to_keep)) {
        // |to_delete| contains a path to keep. Add to stack for further
        // processing.
        action = kEnqueue;
        break;
      }
    }

    switch (action) {
      case kDelete:
        base::DeleteFile(to_delete, true);
        break;

      case kEnqueue:
        paths_to_consider->push_back(to_delete);
        break;

      case kSkip:
        break;
    }
  }
}

// Synchronously attempts to delete |unnormalized_root|, preserving only
// entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on
// disk, then it completely removes |unnormalized_root|. All paths must be
// absolute paths.
void BlockingObliteratePath(
    const base::FilePath& unnormalized_browser_context_root,
    const base::FilePath& unnormalized_root,
    const std::vector<base::FilePath>& paths_to_keep,
    const scoped_refptr<base::TaskRunner>& closure_runner,
    const base::Closure& on_gc_required) {
  // Early exit required because MakeAbsoluteFilePath() will fail on POSIX
  // if |unnormalized_root| does not exist. This is safe because there is
  // nothing to do in this situation anwyays.
  if (!base::PathExists(unnormalized_root)) {
    return;
  }

  // Never try to obliterate things outside of the browser context root or the
  // browser context root itself. Die hard.
  base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root);
  base::FilePath browser_context_root =
      base::MakeAbsoluteFilePath(unnormalized_browser_context_root);
  CHECK(!root.empty());
  CHECK(!browser_context_root.empty());
  CHECK(browser_context_root.IsParent(root) && browser_context_root != root);

  // Reduce |paths_to_keep| set to those under the root and actually on disk.
  std::vector<base::FilePath> valid_paths_to_keep;
  for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin();
       it != paths_to_keep.end();
       ++it) {
    if (root.IsParent(*it) && base::PathExists(*it))
      valid_paths_to_keep.push_back(*it);
  }

  // If none of the |paths_to_keep| are valid anymore then we just whack the
  // root and be done with it.  Otherwise, signal garbage collection and do
  // a best-effort delete of the on-disk structures.
  if (valid_paths_to_keep.empty()) {
    base::DeleteFile(root, true);
    return;
  }
  closure_runner->PostTask(FROM_HERE, on_gc_required);

  // Otherwise, start at the root and delete everything that is not in
  // |valid_paths_to_keep|.
  std::vector<base::FilePath> paths_to_consider;
  paths_to_consider.push_back(root);
  while(!paths_to_consider.empty()) {
    base::FilePath path = paths_to_consider.back();
    paths_to_consider.pop_back();
    ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider);
  }
}

// Deletes all entries inside the |storage_root| that are not in the
// |active_paths|.  Deletion is done in 2 steps:
//
//   (1) Moving all garbage collected paths into a trash directory.
//   (2) Asynchronously deleting the trash directory.
//
// The deletion is asynchronous because after (1) completes, calling code can
// safely continue to use the paths that had just been garbage collected
// without fear of race conditions.
//
// This code also ignores failed moves rather than attempting a smarter retry.
// Moves shouldn't fail here unless there is some out-of-band error (eg.,
// FS corruption). Retry logic is dangerous in the general case because
// there is not necessarily a guaranteed case where the logic may succeed.
//
// This function is still named BlockingGarbageCollect() because it does
// execute a few filesystem operations synchronously.
void BlockingGarbageCollect(
    const base::FilePath& storage_root,
    const scoped_refptr<base::TaskRunner>& file_access_runner,
    scoped_ptr<base::hash_set<base::FilePath> > active_paths) {
  CHECK(storage_root.IsAbsolute());

  base::FileEnumerator enumerator(storage_root, false, kAllFileTypes);
  base::FilePath trash_directory;
  if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname,
                                     &trash_directory)) {
    // Unable to continue without creating the trash directory so give up.
    return;
  }
  for (base::FilePath path = enumerator.Next(); !path.empty();
       path = enumerator.Next()) {
    if (active_paths->find(path) == active_paths->end() &&
        path != trash_directory) {
      // Since |trash_directory| is unique for each run of this function there
      // can be no colllisions on the move.
      base::Move(path, trash_directory.Append(path.BaseName()));
    }
  }

  file_access_runner->PostTask(
      FROM_HERE,
      base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true));
}

}  // namespace

// static
base::FilePath StoragePartitionImplMap::GetStoragePartitionPath(
    const std::string& partition_domain,
    const std::string& partition_name) {
  if (partition_domain.empty())
    return base::FilePath();

  base::FilePath path = GetStoragePartitionDomainPath(partition_domain);

  // TODO(ajwong): Mangle in-memory into this somehow, either by putting
  // it into the partition_name, or by manually adding another path component
  // here.  Otherwise, it's possible to have an in-memory StoragePartition and
  // a persistent one that return the same FilePath for GetPath().
  if (!partition_name.empty()) {
    // For analysis of why we can ignore collisions, see the comment above
    // kPartitionNameHashBytes.
    char buffer[kPartitionNameHashBytes];
    crypto::SHA256HashString(partition_name, &buffer[0],
                             sizeof(buffer));
    return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer)));
  }

  return path.Append(kDefaultPartitionDirname);
}

StoragePartitionImplMap::StoragePartitionImplMap(
    BrowserContext* browser_context)
    : browser_context_(browser_context),
      resource_context_initialized_(false) {
  // Doing here instead of initializer list cause it's just too ugly to read.
  base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool();
  file_access_runner_ =
      blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken());
}

StoragePartitionImplMap::~StoragePartitionImplMap() {
  STLDeleteContainerPairSecondPointers(partitions_.begin(),
                                       partitions_.end());
}

StoragePartitionImpl* StoragePartitionImplMap::Get(
    const std::string& partition_domain,
    const std::string& partition_name,
    bool in_memory) {
  // Find the previously created partition if it's available.
  StoragePartitionConfig partition_config(
      partition_domain, partition_name, in_memory);

  PartitionMap::const_iterator it = partitions_.find(partition_config);
  if (it != partitions_.end())
    return it->second;

  base::FilePath partition_path =
      browser_context_->GetPath().Append(
          GetStoragePartitionPath(partition_domain, partition_name));
  StoragePartitionImpl* partition =
      StoragePartitionImpl::Create(browser_context_, in_memory,
                                   partition_path);
  partitions_[partition_config] = partition;

  ChromeBlobStorageContext* blob_storage_context =
      ChromeBlobStorageContext::GetFor(browser_context_);
  StreamContext* stream_context = StreamContext::GetFor(browser_context_);
  ProtocolHandlerMap protocol_handlers;
  protocol_handlers[kBlobScheme] =
      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
          new BlobProtocolHandler(blob_storage_context,
                                  stream_context,
                                  partition->GetFileSystemContext()));
  protocol_handlers[kFileSystemScheme] =
      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
          CreateFileSystemProtocolHandler(partition_domain,
                                          partition->GetFileSystemContext()));
  protocol_handlers[kChromeUIScheme] =
      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
          URLDataManagerBackend::CreateProtocolHandler(
              browser_context_->GetResourceContext(),
              browser_context_->IsOffTheRecord(),
              partition->GetAppCacheService(),
              blob_storage_context));
  std::vector<std::string> additional_webui_schemes;
  GetContentClient()->browser()->GetAdditionalWebUISchemes(
      &additional_webui_schemes);
  for (std::vector<std::string>::const_iterator it =
           additional_webui_schemes.begin();
       it != additional_webui_schemes.end();
       ++it) {
    protocol_handlers[*it] =
        linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
            URLDataManagerBackend::CreateProtocolHandler(
                browser_context_->GetResourceContext(),
                browser_context_->IsOffTheRecord(),
                partition->GetAppCacheService(),
                blob_storage_context));
  }
  protocol_handlers[kChromeDevToolsScheme] =
      linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
          CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(),
                                        browser_context_->IsOffTheRecord()));

  ProtocolHandlerScopedVector protocol_interceptors;
  protocol_interceptors.push_back(
      ServiceWorkerRequestHandler::CreateInterceptor().release());

  // These calls must happen after StoragePartitionImpl::Create().
  if (partition_domain.empty()) {
    partition->SetURLRequestContext(
        GetContentClient()->browser()->CreateRequestContext(
            browser_context_,
            &protocol_handlers,
            protocol_interceptors.Pass()));
  } else {
    partition->SetURLRequestContext(
        GetContentClient()->browser()->CreateRequestContextForStoragePartition(
            browser_context_,
            partition->GetPath(),
            in_memory,
            &protocol_handlers,
            protocol_interceptors.Pass()));
  }
  partition->SetMediaURLRequestContext(
      partition_domain.empty() ?
      browser_context_->GetMediaRequestContext() :
      browser_context_->GetMediaRequestContextForStoragePartition(
          partition->GetPath(), in_memory));

  PostCreateInitialization(partition, in_memory);

  return partition;
}

void StoragePartitionImplMap::AsyncObliterate(
    const GURL& site,
    const base::Closure& on_gc_required) {
  // This method should avoid creating any StoragePartition (which would
  // create more open file handles) so that it can delete as much of the
  // data off disk as possible.
  std::string partition_domain;
  std::string partition_name;
  bool in_memory = false;
  GetContentClient()->browser()->GetStoragePartitionConfigForSite(
      browser_context_, site, false, &partition_domain,
      &partition_name, &in_memory);

  // Find the active partitions for the domain. Because these partitions are
  // active, it is not possible to just delete the directories that contain
  // the backing data structures without causing the browser to crash. Instead,
  // of deleteing the directory, we tell each storage context later to
  // remove any data they have saved. This will leave the directory structure
  // intact but it will only contain empty databases.
  std::vector<StoragePartitionImpl*> active_partitions;
  std::vector<base::FilePath> paths_to_keep;
  for (PartitionMap::const_iterator it = partitions_.begin();
       it != partitions_.end();
       ++it) {
    const StoragePartitionConfig& config = it->first;
    if (config.partition_domain == partition_domain) {
      it->second->ClearData(
          // All except shader cache.
          StoragePartition::REMOVE_DATA_MASK_ALL &
            (~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE),
          StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL,
          GURL(),
          StoragePartition::OriginMatcherFunction(),
          base::Time(), base::Time::Max(),
          base::Bind(&base::DoNothing));
      if (!config.in_memory) {
        paths_to_keep.push_back(it->second->GetPath());
      }
    }
  }

  // Start a best-effort delete of the on-disk storage excluding paths that are
  // known to still be in use. This is to delete any previously created
  // StoragePartition state that just happens to not have been used during this
  // run of the browser.
  base::FilePath domain_root = browser_context_->GetPath().Append(
      GetStoragePartitionDomainPath(partition_domain));

  BrowserThread::PostBlockingPoolTask(
      FROM_HERE,
      base::Bind(&BlockingObliteratePath, browser_context_->GetPath(),
                 domain_root, paths_to_keep,
                 base::MessageLoopProxy::current(), on_gc_required));
}

void StoragePartitionImplMap::GarbageCollect(
    scoped_ptr<base::hash_set<base::FilePath> > active_paths,
    const base::Closure& done) {
  // Include all paths for current StoragePartitions in the active_paths since
  // they cannot be deleted safely.
  for (PartitionMap::const_iterator it = partitions_.begin();
       it != partitions_.end();
       ++it) {
    const StoragePartitionConfig& config = it->first;
    if (!config.in_memory)
      active_paths->insert(it->second->GetPath());
  }

  // Find the directory holding the StoragePartitions and delete everything in
  // there that isn't considered active.
  base::FilePath storage_root = browser_context_->GetPath().Append(
      GetStoragePartitionDomainPath(std::string()));
  file_access_runner_->PostTaskAndReply(
      FROM_HERE,
      base::Bind(&BlockingGarbageCollect, storage_root,
                 file_access_runner_,
                 base::Passed(&active_paths)),
      done);
}

void StoragePartitionImplMap::ForEach(
    const BrowserContext::StoragePartitionCallback& callback) {
  for (PartitionMap::const_iterator it = partitions_.begin();
       it != partitions_.end();
       ++it) {
    callback.Run(it->second);
  }
}

void StoragePartitionImplMap::PostCreateInitialization(
    StoragePartitionImpl* partition,
    bool in_memory) {
  // TODO(ajwong): ResourceContexts no longer have any storage related state.
  // We should move this into a place where it is called once per
  // BrowserContext creation rather than piggybacking off the default context
  // creation.
  // Note: moving this into Get() before partitions_[] is set causes reentrency.
  if (!resource_context_initialized_) {
    resource_context_initialized_ = true;
    InitializeResourceContext(browser_context_);
  }

  // Check first to avoid memory leak in unittests.
  if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) {
    BrowserThread::PostTask(
        BrowserThread::IO, FROM_HERE,
        base::Bind(&ChromeAppCacheService::InitializeOnIOThread,
                   partition->GetAppCacheService(),
                   in_memory ? base::FilePath() :
                       partition->GetPath().Append(kAppCacheDirname),
                   browser_context_->GetResourceContext(),
                   make_scoped_refptr(partition->GetURLRequestContext()),
                   make_scoped_refptr(
                       browser_context_->GetSpecialStoragePolicy())));

    // We do not call InitializeURLRequestContext() for media contexts because,
    // other than the HTTP cache, the media contexts share the same backing
    // objects as their associated "normal" request context.  Thus, the previous
    // call serves to initialize the media request context for this storage
    // partition as well.
  }
}

}  // namespace content

/* [<][>][^][v][top][bottom][index][help] */