This source file includes following definitions.
- isMultipart
- contentType
- charset
- contentTransferEncoding
- contentLocation
- multiPartType
- endOfPartBoundary
- endOfDocumentBoundary
- retrieveKeyValuePairs
- parseHeader
- parseContentTransferEncoding
- skipLinesUntilBoundaryFound
- parseArchive
- parseArchiveWithHeader
- addResourceToArchive
- parseNextPart
- frameCount
- frameAt
- subResourceCount
- subResourceAt
#include "config.h"
#include "platform/mhtml/MHTMLParser.h"
#include "platform/MIMETypeRegistry.h"
#include "platform/mhtml/MHTMLArchive.h"
#include "platform/network/ParsedContentType.h"
#include "platform/text/QuotedPrintable.h"
#include "wtf/HashMap.h"
#include "wtf/RefCounted.h"
#include "wtf/text/Base64.h"
#include "wtf/text/StringBuilder.h"
#include "wtf/text/StringConcatenate.h"
#include "wtf/text/StringHash.h"
#include "wtf/text/WTFString.h"
namespace WebCore {
class MIMEHeader : public RefCounted<MIMEHeader> {
public:
enum Encoding {
QuotedPrintable,
Base64,
EightBit,
SevenBit,
Binary,
Unknown
};
static PassRefPtr<MIMEHeader> parseHeader(SharedBufferChunkReader* crLFLineReader);
bool isMultipart() const { return m_contentType.startsWith("multipart/"); }
String contentType() const { return m_contentType; }
String charset() const { return m_charset; }
Encoding contentTransferEncoding() const { return m_contentTransferEncoding; }
String contentLocation() const { return m_contentLocation; }
String multiPartType() const { return m_multipartType; }
String endOfPartBoundary() const { return m_endOfPartBoundary; }
String endOfDocumentBoundary() const { return m_endOfDocumentBoundary; }
private:
MIMEHeader();
static Encoding parseContentTransferEncoding(const String&);
String m_contentType;
String m_charset;
Encoding m_contentTransferEncoding;
String m_contentLocation;
String m_multipartType;
String m_endOfPartBoundary;
String m_endOfDocumentBoundary;
};
typedef HashMap<String, String> KeyValueMap;
static KeyValueMap retrieveKeyValuePairs(WebCore::SharedBufferChunkReader* buffer)
{
KeyValueMap keyValuePairs;
String line;
String key;
StringBuilder value;
while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
if (line.isEmpty())
break;
if (line[0] == '\t') {
ASSERT(!key.isEmpty());
value.append(line.substring(1));
continue;
}
if (!key.isEmpty()) {
if (keyValuePairs.find(key) != keyValuePairs.end())
WTF_LOG_ERROR("Key duplicate found in MIME header. Key is '%s', previous value replaced.", key.ascii().data());
keyValuePairs.add(key, value.toString().stripWhiteSpace());
key = String();
value.clear();
}
size_t semiColonIndex = line.find(':');
if (semiColonIndex == kNotFound) {
continue;
}
key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();
value.append(line.substring(semiColonIndex + 1));
}
if (!key.isEmpty())
keyValuePairs.set(key, value.toString().stripWhiteSpace());
return keyValuePairs;
}
PassRefPtr<MIMEHeader> MIMEHeader::parseHeader(SharedBufferChunkReader* buffer)
{
RefPtr<MIMEHeader> mimeHeader = adoptRef(new MIMEHeader);
KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer);
KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-type");
if (mimeParametersIterator != keyValuePairs.end()) {
ParsedContentType parsedContentType(mimeParametersIterator->value);
mimeHeader->m_contentType = parsedContentType.mimeType();
if (!mimeHeader->isMultipart()) {
mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace();
} else {
mimeHeader->m_multipartType = parsedContentType.parameterValueForName("type");
mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueForName("boundary");
if (mimeHeader->m_endOfPartBoundary.isNull()) {
WTF_LOG_ERROR("No boundary found in multipart MIME header.");
return nullptr;
}
mimeHeader->m_endOfPartBoundary.insert("--", 0);
mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundary;
mimeHeader->m_endOfDocumentBoundary.append("--");
}
}
mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");
if (mimeParametersIterator != keyValuePairs.end())
mimeHeader->m_contentTransferEncoding = parseContentTransferEncoding(mimeParametersIterator->value);
mimeParametersIterator = keyValuePairs.find("content-location");
if (mimeParametersIterator != keyValuePairs.end())
mimeHeader->m_contentLocation = mimeParametersIterator->value;
return mimeHeader.release();
}
MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(const String& text)
{
String encoding = text.stripWhiteSpace().lower();
if (encoding == "base64")
return Base64;
if (encoding == "quoted-printable")
return QuotedPrintable;
if (encoding == "8bit")
return EightBit;
if (encoding == "7bit")
return SevenBit;
if (encoding == "binary")
return Binary;
WTF_LOG_ERROR("Unknown encoding '%s' found in MIME header.", text.ascii().data());
return Unknown;
}
MIMEHeader::MIMEHeader()
: m_contentTransferEncoding(Unknown)
{
}
static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, const String& boundary)
{
String line;
while (!(line = lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
if (line == boundary)
return true;
}
return false;
}
MHTMLParser::MHTMLParser(SharedBuffer* data)
: m_lineReader(data, "\r\n")
{
}
PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive()
{
RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader);
return parseArchiveWithHeader(header.get());
}
PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
{
if (!header) {
WTF_LOG_ERROR("Failed to parse MHTML part: no header.");
return nullptr;
}
RefPtr<MHTMLArchive> archive = MHTMLArchive::create();
if (!header->isMultipart()) {
bool endOfArchiveReached = false;
RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), String(), endOfArchiveReached);
if (!resource)
return nullptr;
archive->setMainResource(resource);
return archive;
}
skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
bool endOfArchive = false;
while (!endOfArchive) {
RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
if (!resourceHeader) {
WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header.");
return nullptr;
}
if (resourceHeader->contentType() == "multipart/alternative") {
RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resourceHeader.get());
if (!subframeArchive) {
WTF_LOG_ERROR("Failed to parse MHTML subframe.");
return nullptr;
}
bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
ASSERT_UNUSED(endOfPartReached, endOfPartReached);
if (subframeArchive->mainResource())
addResourceToArchive(subframeArchive->mainResource(), archive.get());
archive->addSubframeArchive(subframeArchive);
continue;
}
RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive);
if (!resource) {
WTF_LOG_ERROR("Failed to parse MHTML part.");
return nullptr;
}
addResourceToArchive(resource.get(), archive.get());
}
return archive.release();
}
void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive* archive)
{
const AtomicString& mimeType = resource->mimeType();
if (!MIMETypeRegistry::isSupportedNonImageMIMEType(mimeType) || MIMETypeRegistry::isSupportedJavaScriptMIMEType(mimeType) || mimeType == "text/css") {
m_resources.append(resource);
return;
}
if (!archive->mainResource()) {
archive->setMainResource(resource);
m_frames.append(archive);
return;
}
RefPtr<MHTMLArchive> subframe = MHTMLArchive::create();
subframe->setMainResource(resource);
m_frames.append(subframe);
}
PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached)
{
ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());
MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEncoding();
if (contentTransferEncoding == MIMEHeader::Unknown)
contentTransferEncoding = MIMEHeader::Binary;
RefPtr<SharedBuffer> content = SharedBuffer::create();
const bool checkBoundary = !endOfPartBoundary.isEmpty();
bool endOfPartReached = false;
if (contentTransferEncoding == MIMEHeader::Binary) {
if (!checkBoundary) {
WTF_LOG_ERROR("Binary contents requires end of part");
return nullptr;
}
m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
Vector<char> part;
if (!m_lineReader.nextChunk(part)) {
WTF_LOG_ERROR("Binary contents requires end of part");
return nullptr;
}
content->append(part);
m_lineReader.setSeparator("\r\n");
Vector<char> nextChars;
if (m_lineReader.peek(nextChars, 2) != 2) {
WTF_LOG_ERROR("Invalid seperator.");
return nullptr;
}
endOfPartReached = true;
ASSERT(nextChars.size() == 2);
endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
if (!endOfArchiveReached) {
String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
if (!line.isEmpty()) {
WTF_LOG_ERROR("No CRLF at end of binary section.");
return nullptr;
}
}
} else {
String line;
while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
endOfArchiveReached = (line == endOfDocumentBoundary);
if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
endOfPartReached = true;
break;
}
content->append(line.utf8().data(), line.length());
if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {
content->append("\r\n", 2);
}
}
}
if (!endOfPartReached && checkBoundary) {
WTF_LOG_ERROR("No bounday found for MHTML part.");
return nullptr;
}
Vector<char> data;
switch (contentTransferEncoding) {
case MIMEHeader::Base64:
if (!base64Decode(content->data(), content->size(), data)) {
WTF_LOG_ERROR("Invalid base64 content for MHTML part.");
return nullptr;
}
break;
case MIMEHeader::QuotedPrintable:
quotedPrintableDecode(content->data(), content->size(), data);
break;
case MIMEHeader::EightBit:
case MIMEHeader::SevenBit:
case MIMEHeader::Binary:
data.append(content->data(), content->size());
break;
default:
WTF_LOG_ERROR("Invalid encoding for MHTML part.");
return nullptr;
}
RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
KURL location = KURL(KURL(), mimeHeader.contentLocation());
return ArchiveResource::create(contentBuffer, location, AtomicString(mimeHeader.contentType()), AtomicString(mimeHeader.charset()), String());
}
size_t MHTMLParser::frameCount() const
{
return m_frames.size();
}
MHTMLArchive* MHTMLParser::frameAt(size_t index) const
{
return m_frames[index].get();
}
size_t MHTMLParser::subResourceCount() const
{
return m_resources.size();
}
ArchiveResource* MHTMLParser::subResourceAt(size_t index) const
{
return m_resources[index].get();
}
}