root/net/base/mime_sniffer_unittest.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. TestArray
  2. SniffMimeType
  3. TEST
  4. TEST
  5. TEST
  6. TEST
  7. TEST
  8. TEST
  9. TEST
  10. TEST
  11. TEST
  12. TEST
  13. TEST
  14. TEST

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/basictypes.h"
#include "net/base/mime_sniffer.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"

namespace net {

struct SnifferTest {
  const char* content;
  size_t content_len;
  std::string url;
  std::string type_hint;
  const char* mime_type;
};

static void TestArray(SnifferTest* tests, size_t count) {
  std::string mime_type;

  for (size_t i = 0; i < count; ++i) {
    SniffMimeType(tests[i].content,
                       tests[i].content_len,
                       GURL(tests[i].url),
                       tests[i].type_hint,
                       &mime_type);
    EXPECT_EQ(tests[i].mime_type, mime_type);
  }
}

// TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
// so the error messages produced by test failures are more useful.
static std::string SniffMimeType(const std::string& content,
                                 const std::string& url,
                                 const std::string& mime_type_hint) {
  std::string mime_type;
  SniffMimeType(content.data(), content.size(), GURL(url),
                     mime_type_hint, &mime_type);
  return mime_type;
}

TEST(MimeSnifferTest, BoundaryConditionsTest) {
  std::string mime_type;
  std::string type_hint;

  char buf[] = {
    'd', '\x1f', '\xFF'
  };

  GURL url;

  SniffMimeType(buf, 0, url, type_hint, &mime_type);
  EXPECT_EQ("text/plain", mime_type);
  SniffMimeType(buf, 1, url, type_hint, &mime_type);
  EXPECT_EQ("text/plain", mime_type);
  SniffMimeType(buf, 2, url, type_hint, &mime_type);
  EXPECT_EQ("application/octet-stream", mime_type);
}

TEST(MimeSnifferTest, BasicSniffingTest) {
  SnifferTest tests[] = {
    { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
      "http://www.example.com/",
      "", "text/html" },
    { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
      "http://www.example.com/foo.gif",
      "application/octet-stream", "application/octet-stream" },
    { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
      "http://www.example.com/foo",
      "text/plain", "image/gif" },
    { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
      "http://www.example.com/foo?param=tt.gif",
      "", "application/octet-stream" },
    { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
      "http://www.example.com/foo",
      "text/plain", "text/plain" },
    { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
      "http://www.example.com/foo",
      "application/octet-stream", "application/octet-stream" },
    { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
      "http://www.example.com/foo",
      "", "image/jpeg" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, ChromeExtensionsTest) {
  SnifferTest tests[] = {
    // schemes
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx",
      "", "application/x-chrome-extension" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "https://www.example.com/foo.crx",
      "", "application/x-chrome-extension" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "ftp://www.example.com/foo.crx",
      "", "application/x-chrome-extension" },

    // some other mimetypes that should get converted
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx",
      "text/plain", "application/x-chrome-extension" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx",
      "application/octet-stream", "application/x-chrome-extension" },

    // success edge cases
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx?query=string",
      "", "application/x-chrome-extension" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo..crx",
      "", "application/x-chrome-extension" },

    // wrong file extension
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.bin",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.bin?monkey",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "invalid-url",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foocrx",
      "", "application/octet-stream" },
    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx.blech",
      "", "application/octet-stream" },

    // wrong magic
    { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
      "http://www.example.com/foo.crx?monkey",
      "", "application/octet-stream" },
    { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
      "http://www.example.com/foo.crx?monkey",
      "", "application/octet-stream" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, MozillaCompatibleTest) {
  SnifferTest tests[] = {
    { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
      "http://www.example.com/",
      "", "text/html" },
    { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
      "http://www.example.com/",
      "text/plain", "text/plain" },
    { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
      "http://www.example.com/foo",
      "", "image/bmp" },
    { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
      "http://www.example.com/favicon.ico",
      "", "application/octet-stream" },
    { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "From: Fred\nTo: Bob\n\nHi\n.\n",
      sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
      sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
      "http://www.example.com/foo",
      "", "text/xml" },
    { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
      sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
      "http://www.example.com/foo",
      "application/octet-stream", "application/octet-stream" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) {
  SnifferTest tests[] = {
    { "GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo",
      "", "image/gif" },
    { "GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo?q=ttt.html",
      "", "image/gif" },
    { "GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("GIF87a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo#ttt.html",
      "", "image/gif" },
    { "a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo?q=ttt.html",
      "", "text/plain" },
    { "a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo#ttt.html",
      "", "text/plain" },
    { "a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n",
      sizeof("a\n<html>\n<body>"
        "<script>alert('haxorzed');\n</script>"
        "</body></html>\n")-1,
      "http://www.example.com/foo.html",
      "", "text/plain" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, UnicodeTest) {
  SnifferTest tests[] = {
    { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
      sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
      sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
    { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
      sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
      "http://www.example.com/foo",
      "", "text/plain" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, FlashTest) {
  SnifferTest tests[] = {
    { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
      "http://www.example.com/foo",
      "", "application/octet-stream" },
    { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
      "http://www.example.com/foo?q=ttt.swf",
      "", "application/octet-stream" },
    { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
      "http://www.example.com/foo#ttt.swf",
      "", "application/octet-stream" },
    { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
      "http://www.example.com/foo.swf",
      "", "text/plain" },
    { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
      "http://www.example.com/foo/bar.swf",
      "", "application/octet-stream" },
    { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
      "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
      "", "application/octet-stream" },
    { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
      "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
      "text/plain", "application/octet-stream" },
  };

  TestArray(tests, arraysize(tests));
}

TEST(MimeSnifferTest, XMLTest) {
  // An easy feed to identify.
  EXPECT_EQ("application/atom+xml",
            SniffMimeType("<?xml?><feed", std::string(), "text/xml"));
  // Don't sniff out of plain text.
  EXPECT_EQ("text/plain",
            SniffMimeType("<?xml?><feed", std::string(), "text/plain"));
  // Simple RSS.
  EXPECT_EQ("application/rss+xml",
            SniffMimeType(
                "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml"));

  // The top of CNN's RSS feed, which we'd like to recognize as RSS.
  static const char kCNNRSS[] =
      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
      "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
      "type=\"text/xsl\" media=\"screen\"?>"
      "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
      "type=\"text/css\" media=\"screen\"?>"
      "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
      "version=\"2.0\">";
  // CNN's RSS
  EXPECT_EQ("application/rss+xml",
            SniffMimeType(kCNNRSS, std::string(), "text/xml"));
  EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS, std::string(), "text/plain"));

  // Don't sniff random XML as something different.
  EXPECT_EQ("text/xml",
            SniffMimeType("<?xml?><notafeed", std::string(), "text/xml"));
  // Don't sniff random plain-text as something different.
  EXPECT_EQ("text/plain",
            SniffMimeType("<?xml?><notafeed", std::string(), "text/plain"));

  // Positive test for the two instances we upgrade to XHTML.
  EXPECT_EQ("application/xhtml+xml",
            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
                          std::string(),
                          "text/xml"));
  EXPECT_EQ("application/xhtml+xml",
            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
                          std::string(),
                          "application/xml"));

  // Following our behavior with HTML, don't call other mime types XHTML.
  EXPECT_EQ("text/plain",
            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
                          std::string(),
                          "text/plain"));
  EXPECT_EQ("application/rss+xml",
            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
                          std::string(),
                          "application/rss+xml"));

  // Don't sniff other HTML-looking bits as HTML.
  EXPECT_EQ("text/xml",
            SniffMimeType("<html><head>", std::string(), "text/xml"));
  EXPECT_EQ("text/xml",
            SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
                          std::string(),
                          "text/xml"));
}

// Test content which is >= 1024 bytes, and includes no open angle bracket.
// http://code.google.com/p/chromium/issues/detail?id=3521
TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
  // Make a large input, with 1024 bytes of "x".
  std::string content;
  content.resize(1024);
  std::fill(content.begin(), content.end(), 'x');

  // content.size() >= 1024 so the sniff is unambiguous.
  std::string mime_type;
  EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
                            "text/xml", &mime_type));
  EXPECT_EQ("text/xml", mime_type);
}

// Test content which is >= 1024 bytes, and includes a binary looking byte.
// http://code.google.com/p/chromium/issues/detail?id=15314
TEST(MimeSnifferTest, LooksBinary) {
  // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
  std::string content;
  content.resize(1024);
  std::fill(content.begin(), content.end(), 'x');
  content[1000] = 0x01;

  // content.size() >= 1024 so the sniff is unambiguous.
  std::string mime_type;
  EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
                            "text/plain", &mime_type));
  EXPECT_EQ("application/octet-stream", mime_type);
}

TEST(MimeSnifferTest, OfficeTest) {
  SnifferTest tests[] = {
    // Check for URLs incorrectly reported as Microsoft Office files.
    { "Hi there",
      sizeof("Hi there")-1,
      "http://www.example.com/foo.doc",
      "application/msword", "application/octet-stream" },
    { "Hi there",
      sizeof("Hi there")-1,
      "http://www.example.com/foo.xls",
      "application/vnd.ms-excel", "application/octet-stream" },
    { "Hi there",
      sizeof("Hi there")-1,
      "http://www.example.com/foo.ppt",
      "application/vnd.ms-powerpoint", "application/octet-stream" },
    // Check for Microsoft Office files incorrectly reported as text.
    { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
      sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
      "http://www.example.com/foo.doc",
      "text/plain", "application/msword" },
    { "PK\x03\x04" "Hi there",
      sizeof("PK\x03\x04" "Hi there")-1,
      "http://www.example.com/foo.doc",
      "text/plain",
      "application/vnd.openxmlformats-officedocument."
      "wordprocessingml.document" },
    { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
      sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
      "http://www.example.com/foo.xls",
      "text/plain", "application/vnd.ms-excel" },
    { "PK\x03\x04" "Hi there",
      sizeof("PK\x03\x04" "Hi there")-1,
      "http://www.example.com/foo.xls",
      "text/plain",
      "application/vnd.openxmlformats-officedocument."
      "spreadsheetml.sheet" },
    { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
      sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
      "http://www.example.com/foo.ppt",
      "text/plain", "application/vnd.ms-powerpoint" },
    { "PK\x03\x04" "Hi there",
      sizeof("PK\x03\x04" "Hi there")-1,
      "http://www.example.com/foo.ppt",
      "text/plain",
      "application/vnd.openxmlformats-officedocument."
      "presentationml.presentation" },
  };

  TestArray(tests, arraysize(tests));
}

// TODO(thestig) Add more tests for other AV formats. Add another test case for
// RAW images.
TEST(MimeSnifferTest, AudioVideoTest) {
  std::string mime_type;
  const char kFlacTestData[] =
      "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
  EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData,
                                         sizeof(kFlacTestData),
                                         &mime_type));
  EXPECT_EQ("audio/x-flac", mime_type);
  mime_type.clear();

  const char kWMATestData[] =
      "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
  EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData,
                                         sizeof(kWMATestData),
                                         &mime_type));
  EXPECT_EQ("video/x-ms-asf", mime_type);
  mime_type.clear();

  // mp4a, m4b, m4p, and alac extension files which share the same container
  // format.
  const char kMP4TestData[] =
      "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
  EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData,
                                         sizeof(kMP4TestData),
                                         &mime_type));
  EXPECT_EQ("video/mp4", mime_type);
  mime_type.clear();

  const char kAACTestData[] =
      "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
  EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData,
                                         sizeof(kAACTestData),
                                         &mime_type));
  EXPECT_EQ("audio/mpeg", mime_type);
  mime_type.clear();
}

}  // namespace net

/* [<][>][^][v][top][bottom][index][help] */