root/net/tools/dump_cache/url_to_filename_encoder_unittest.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dir_sep_
  2. CheckSegmentLength
  3. CheckValidChars
  4. Validate
  5. ValidateAllSegmentsSmall
  6. ValidateNoChange
  7. ValidateEscaped
  8. ValidateUrl
  9. ValidateUrlOldNew
  10. ValidateEncodeSame
  11. TEST_F
  12. TEST_F
  13. TEST_F
  14. TEST_F
  15. TEST_F
  16. TEST_F
  17. TEST_F
  18. TEST_F
  19. TEST_F
  20. TEST_F
  21. TEST_F
  22. TEST_F

// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/tools/dump_cache/url_to_filename_encoder.h"

#include <string>
#include <vector>

#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"

using base::StringPiece;
using std::string;

namespace net {

#ifdef WIN32
char kDirSeparator = '\\';
char kOtherDirSeparator = '/';
#else
char kDirSeparator = '/';
char kOtherDirSeparator = '\\';
#endif

class UrlToFilenameEncoderTest : public ::testing::Test {
 protected:
  UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar),
                               dir_sep_(1, kDirSeparator) {
  }

  void CheckSegmentLength(const StringPiece& escaped_word) {
    std::vector<StringPiece> components;
    Tokenize(escaped_word, StringPiece("/"), &components);
    for (size_t i = 0; i < components.size(); ++i) {
      EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
                components[i].size());
    }
  }

  void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) {
    // These characters are invalid in Windows.  We add in ', as that's pretty
    // inconvenient in a Unix filename.
    //
    // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
    const string kInvalidChars = "<>:\"|?*'";
    for (size_t i = 0; i < escaped_word.size(); ++i) {
      char c = escaped_word[i];
      EXPECT_EQ(string::npos, kInvalidChars.find(c));
      EXPECT_NE(invalid_slash, c);
      EXPECT_NE('\0', c);  // only invalid character in Posix
      EXPECT_GT(0x7E, c);  // only English printable characters
    }
  }

  void Validate(const string& in_word, const string& gold_word) {
    string escaped_word, url;
    UrlToFilenameEncoder::EncodeSegment(
        std::string(), in_word, '/', &escaped_word);
    EXPECT_EQ(gold_word, escaped_word);
    CheckSegmentLength(escaped_word);
    CheckValidChars(escaped_word, '\\');
    UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
    EXPECT_EQ(in_word, url);
  }

  void ValidateAllSegmentsSmall(const string& in_word) {
    string escaped_word, url;
    UrlToFilenameEncoder::EncodeSegment(
        std::string(), in_word, '/', &escaped_word);
    CheckSegmentLength(escaped_word);
    CheckValidChars(escaped_word, '\\');
    UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
    EXPECT_EQ(in_word, url);
  }

  void ValidateNoChange(const string& word) {
    // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
    Validate(word, word + escape_);
  }

  void ValidateEscaped(unsigned char ch) {
    // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
    char escaped[100];
    const char escape = UrlToFilenameEncoder::kEscapeChar;
    base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape);
    Validate(string(1, ch), escaped);
  }

  void ValidateUrl(const string& url, const string& base_path,
                   bool legacy_escape, const string& gold_filename) {
    string encoded_filename = UrlToFilenameEncoder::Encode(
        url, base_path, legacy_escape);
    EXPECT_EQ(gold_filename, encoded_filename);
    if (!legacy_escape) {
      CheckSegmentLength(encoded_filename);
      CheckValidChars(encoded_filename, kOtherDirSeparator);
      string decoded_url;
      UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator,
                                   &decoded_url);
      if (url != decoded_url) {
        EXPECT_EQ(url, "http://" + decoded_url);
      }
    }
  }

  void ValidateUrlOldNew(const string& url, const string& gold_old_filename,
                         const string& gold_new_filename) {
    ValidateUrl(url, std::string(), true, gold_old_filename);
    ValidateUrl(url, std::string(), false, gold_new_filename);
  }

  void ValidateEncodeSame(const string& url1, const string& url2) {
    string filename1 = UrlToFilenameEncoder::Encode(url1, std::string(), false);
    string filename2 = UrlToFilenameEncoder::Encode(url2, std::string(), false);
    EXPECT_EQ(filename1, filename2);
  }

  string escape_;
  string dir_sep_;
};

TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) {
  ValidateNoChange(std::string());
  ValidateNoChange("abcdefg");
  ValidateNoChange("abcdefghijklmnopqrstuvwxyz");
  ValidateNoChange("ZYXWVUT");
  ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA");
  ValidateNoChange("01234567689");
  ValidateNoChange("_.=+-");
  ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA"
                   "01234567689_.=+-");
  ValidateNoChange("index.html");
  ValidateNoChange("/");
  ValidateNoChange("/.");
  ValidateNoChange(".");
  ValidateNoChange("..");
}

TEST_F(UrlToFilenameEncoderTest, Escapes) {
  const string bad_chars =
      "<>:\"\\|?*"      // Illegal on Windows
      "~`!$^&(){}[]';"  // Bad for Unix shells
      "^@"              // Build tool doesn't like
      "#%"              // Tool doesn't like
      ",";              // The escape char has to be escaped

  for (size_t i = 0; i < bad_chars.size(); ++i) {
    ValidateEscaped(bad_chars[i]);
  }

  // Check non-printable characters.
  ValidateEscaped('\0');
  for (size_t i = 127; i < 256; ++i) {
    ValidateEscaped(static_cast<char>(i));
  }
}

TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) {
  Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_);
  Validate("/./", "/" + escape_ + "./" + escape_);
  Validate("/../", "/" + escape_ + "../" + escape_);
  Validate("//", "/" + escape_ + "2F" + escape_);
  Validate("/./leaf", "/" + escape_ + "./leaf" + escape_);
  Validate("/../leaf", "/" + escape_ + "../leaf" + escape_);
  Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_);
  Validate("mysite/u?param1=x&param2=y",
           "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" +
           escape_);
  Validate("search?q=dogs&go=&form=QBLH&qs=n",  // from Latency Labs bing test.
           "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ +
           "26form=QBLH" + escape_ + "26qs=n" + escape_);
  Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true",
           "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ +
           "3Fid=138" + escape_ + "26content=true" + escape_);
}

TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) {
  ValidateUrlOldNew("http://www.google.com/index.html",
                    "www.google.com" + dir_sep_ + "indexx2Ehtml",
                    "www.google.com" + dir_sep_ + "index.html" + escape_);
  ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=",
                    "www.google.com" + dir_sep_ + "x" + dir_sep_ +
                    "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D",

                    "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" +
                    escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ +
                    "26oq=" + escape_);
  ValidateUrlOldNew("http://www.foo.com/a//",
                    "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml",
                    "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" +
                    escape_);

  // From bug: Double slash preserved.
  ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html",
              std::string(),
              false,
              "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" +
              escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" +
              dir_sep_ + "index.html" + escape_);
  ValidateUrlOldNew(
      "http://blogutils.net/olct/online.php?"
      "site=http://thelwordfanfics.blogspot.&interval=600",

      "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F"
      "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600",

      "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ +
      "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ +
      "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_);
}

// From bug: Escapes treated the same as normal char.
TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) {
  for (int i = 0; i < 128; ++i) {
    string unescaped(1, static_cast<char>(i));
    string escaped = base::StringPrintf("%%%02X", i);
    ValidateEncodeSame(unescaped, escaped);
  }

  ValidateEncodeSame(
      "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot"
      "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch",

      "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot"
      "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch");
}

// From bug: Filename encoding is not prefix-free.
TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) {
  Validate("/", "/" + escape_);
  Validate("//", "/" + escape_ + "2F" + escape_);
  Validate("///", "/" + escape_ + "2F" + "/" + escape_);
}

TEST_F(UrlToFilenameEncoderTest, LongTail) {
  static char long_word[] =
      "~joebob/briggs/12345678901234567890123456789012345678901234567890"
      "1234567890123456789012345678901234567890123456789012345678901234567890"
      "1234567890123456789012345678901234567890123456789012345678901234567890"
      "1234567890123456789012345678901234567890123456789012345678901234567890"
      "1234567890123456789012345678901234567890123456789012345678901234567890"
      "1234567890123456789012345678901234567890123456789012345678901234567890";

  // the long lines in the string below are 64 characters, so we can see
  // the slashes every 128.
  string gold_long_word =
      escape_ + "7Ejoebob/briggs/"
      "1234567890123456789012345678901234567890123456789012345678901234"
      "56789012345678901234567890123456789012345678901234567890123456" +
      escape_ + "-/"
      "7890123456789012345678901234567890123456789012345678901234567890"
      "12345678901234567890123456789012345678901234567890123456789012" +
      escape_ + "-/"
      "3456789012345678901234567890123456789012345678901234567890123456"
      "78901234567890123456789012345678901234567890123456789012345678" +
      escape_ + "-/"
      "9012345678901234567890" + escape_;
  EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
            sizeof(long_word));
  Validate(long_word, gold_long_word);
}

TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) {
  // Here the '?' in the last path segment expands to @3F, making
  // it hit 128 chars before the input segment gets that big.
  static char long_word[] =
      "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?"
      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?";

  // Notice that at the end of the third segment, we avoid splitting
  // the (escape_ + "3F") that was generated from the "?", so that segment is
  // only 127 characters.
  string pattern = "1234567" + escape_ + "3F";  // 10 characters
  string gold_long_word =
      escape_ + "7Ejoebob/briggs/" +
      pattern + pattern + pattern + pattern + pattern + pattern + "1234"
      "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
       "123456" + escape_ + "-/"
      "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
      pattern + pattern + pattern + pattern + pattern + pattern + pattern +
      "12" +
      escape_ + "-/"
      "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern
      + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern
      + pattern + "1234567" +
      escape_ + "-/" +
      escape_ + "3F" + pattern + pattern + escape_;
  EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
            sizeof(long_word));
  Validate(long_word, gold_long_word);
}

TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) {
  // hit corner cases, +/- 4 characters from kMaxLen
  for (int i = -4; i <= 4; ++i) {
    string input;
    input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x');
    ValidateAllSegmentsSmall(input);
  }
}

TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) {
  // hit corner cases, +/- 4 characters from kMaxLen.  This time we
  // leave off the last 'x' and put in a '.', which ensures that we
  // are truncating with '/' *after* the expansion.
  for (int i = -4; i <= 4; ++i) {
    string input;
    input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x');
    input.append(1, '.');  // this will expand to 3 characters.
    ValidateAllSegmentsSmall(input);
  }
}

TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) {
  Validate("/a/b/c", "/a/b/c" + escape_);        // c is leaf file "c,"
  Validate("/a/b/c/d", "/a/b/c/d" + escape_);    // c is directory "c"
  Validate("/a/b/c/d/", "/a/b/c/d/" + escape_);
}


TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) {
  string long_word;
  string escaped_word;
  long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x');
  UrlToFilenameEncoder::EncodeSegment(
      std::string(), long_word, '\\', &escaped_word);

  // check that one backslash, plus the escape ",-", and the ending , got added.
  EXPECT_EQ(long_word.size() + 4, escaped_word.size());
  ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
            escaped_word.size());
  // Check that the backslash got inserted at the correct spot.
  EXPECT_EQ('\\', escaped_word[
      UrlToFilenameEncoder::kMaximumSubdirectoryLength]);
}

}  // namespace net


/* [<][>][^][v][top][bottom][index][help] */