root/Source/platform/network/HTTPParsers.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. skipWhiteSpace
  2. skipToken
  3. skipEquals
  4. skipValue
  5. isValidHTTPHeaderValue
  6. isValidHTTPToken
  7. trimInputSample
  8. contentDispositionType
  9. parseHTTPRefresh
  10. parseDate
  11. filenameFromHTTPContentDisposition
  12. extractMIMETypeFromMediaType
  13. extractCharsetFromMediaType
  14. findCharsetInMediaType
  15. parseXSSProtectionHeader
  16. parseContentTypeOptionsHeader
  17. extractReasonPhraseFromHTTPStatusLine
  18. parseXFrameOptionsHeader
  19. parseRange
  20. parseHTTPRequestLine
  21. parseHTTPHeader
  22. parseHTTPRequestBody

/*
 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
 * Copyright (C) 2009 Google Inc. All rights reserved.
 * Copyright (C) 2011 Apple Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1.  Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 * 2.  Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
 *     its contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"
#include "platform/network/HTTPParsers.h"

#include "wtf/DateMath.h"
#include "wtf/text/CString.h"
#include "wtf/text/StringBuilder.h"
#include "wtf/text/WTFString.h"
#include "wtf/unicode/CharacterNames.h"

using namespace WTF;

namespace WebCore {

// true if there is more to parse, after incrementing pos past whitespace.
// Note: Might return pos == str.length()
static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
{
    unsigned len = str.length();

    if (fromHttpEquivMeta) {
        while (pos < len && str[pos] <= ' ')
            ++pos;
    } else {
        while (pos < len && (str[pos] == '\t' || str[pos] == ' '))
            ++pos;
    }

    return pos < len;
}

// Returns true if the function can match the whole token (case insensitive)
// incrementing pos on match, otherwise leaving pos unchanged.
// Note: Might return pos == str.length()
static inline bool skipToken(const String& str, unsigned& pos, const char* token)
{
    unsigned len = str.length();
    unsigned current = pos;

    while (current < len && *token) {
        if (toASCIILower(str[current]) != *token++)
            return false;
        ++current;
    }

    if (*token)
        return false;

    pos = current;
    return true;
}

// True if the expected equals sign is seen and there is more to follow.
static inline bool skipEquals(const String& str, unsigned &pos)
{
    return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
}

// True if a value present, incrementing pos to next space or semicolon, if any.
// Note: might return pos == str.length().
static inline bool skipValue(const String& str, unsigned& pos)
{
    unsigned start = pos;
    unsigned len = str.length();
    while (pos < len) {
        if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
            break;
        ++pos;
    }
    return pos != start;
}

bool isValidHTTPHeaderValue(const String& name)
{
    // FIXME: This should really match name against
    // field-value in section 4.2 of RFC 2616.

    return name.containsOnlyLatin1() && !name.contains('\r') && !name.contains('\n');
}

// See RFC 2616, Section 2.2.
bool isValidHTTPToken(const String& characters)
{
    if (characters.isEmpty())
        return false;
    for (unsigned i = 0; i < characters.length(); ++i) {
        UChar c = characters[i];
        if (c <= 0x20 || c >= 0x7F
            || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
            || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
            || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
            || c == '{' || c == '}')
        return false;
    }
    return true;
}

static const size_t maxInputSampleSize = 128;
static String trimInputSample(const char* p, size_t length)
{
    if (length > maxInputSampleSize)
        return String(p, maxInputSampleSize) + horizontalEllipsis;
    return String(p, length);
}

ContentDispositionType contentDispositionType(const String& contentDisposition)
{
    if (contentDisposition.isEmpty())
        return ContentDispositionNone;

    Vector<String> parameters;
    contentDisposition.split(';', parameters);

    if (parameters.isEmpty())
        return ContentDispositionNone;

    String dispositionType = parameters[0];
    dispositionType.stripWhiteSpace();

    if (equalIgnoringCase(dispositionType, "inline"))
        return ContentDispositionInline;

    // Some broken sites just send bogus headers like
    //
    //   Content-Disposition: ; filename="file"
    //   Content-Disposition: filename="file"
    //   Content-Disposition: name="file"
    //
    // without a disposition token... screen those out.
    if (!isValidHTTPToken(dispositionType))
        return ContentDispositionNone;

    // We have a content-disposition of "attachment" or unknown.
    // RFC 2183, section 2.8 says that an unknown disposition
    // value should be treated as "attachment"
    return ContentDispositionAttachment;
}

bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
{
    unsigned len = refresh.length();
    unsigned pos = 0;

    if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
        return false;

    while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
        ++pos;

    if (pos == len) { // no URL
        url = String();
        bool ok;
        delay = refresh.stripWhiteSpace().toDouble(&ok);
        return ok;
    } else {
        bool ok;
        delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
        if (!ok)
            return false;

        ++pos;
        skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
        unsigned urlStartPos = pos;
        if (refresh.find("url", urlStartPos, false) == urlStartPos) {
            urlStartPos += 3;
            skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
            if (refresh[urlStartPos] == '=') {
                ++urlStartPos;
                skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
            } else {
                urlStartPos = pos; // e.g. "Refresh: 0; url.html"
            }
        }

        unsigned urlEndPos = len;

        if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
            UChar quotationMark = refresh[urlStartPos];
            urlStartPos++;
            while (urlEndPos > urlStartPos) {
                urlEndPos--;
                if (refresh[urlEndPos] == quotationMark)
                    break;
            }

            // https://bugs.webkit.org/show_bug.cgi?id=27868
            // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
            // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
            // after the opening quote instead.
            if (urlEndPos == urlStartPos)
                urlEndPos = len;
        }

        url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
        return true;
    }
}

double parseDate(const String& value)
{
    return parseDateFromNullTerminatedCharacters(value.utf8().data());
}

// FIXME: This function doesn't comply with RFC 6266.
// For example, this function doesn't handle the interaction between " and ;
// that arises from quoted-string, nor does this function properly unquote
// attribute values. Further this function appears to process parameter names
// in a case-sensitive manner. (There are likely other bugs as well.)
String filenameFromHTTPContentDisposition(const String& value)
{
    Vector<String> keyValuePairs;
    value.split(';', keyValuePairs);

    unsigned length = keyValuePairs.size();
    for (unsigned i = 0; i < length; i++) {
        size_t valueStartPos = keyValuePairs[i].find('=');
        if (valueStartPos == kNotFound)
            continue;

        String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();

        if (key.isEmpty() || key != "filename")
            continue;

        String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();

        // Remove quotes if there are any
        if (value[0] == '\"')
            value = value.substring(1, value.length() - 2);

        return value;
    }

    return String();
}

AtomicString extractMIMETypeFromMediaType(const AtomicString& mediaType)
{
    StringBuilder mimeType;
    unsigned length = mediaType.length();
    mimeType.reserveCapacity(length);
    for (unsigned i = 0; i < length; i++) {
        UChar c = mediaType[i];

        if (c == ';')
            break;

        // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
        // type header field, Content-Type. In such cases, the media type string passed here may contain
        // the multiple values separated by commas. For now, this code ignores text after the first comma,
        // which prevents it from simply failing to parse such types altogether. Later for better
        // compatibility we could consider using the first or last valid MIME type instead.
        // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
        if (c == ',')
            break;

        // FIXME: The following is not correct. RFC 2616 allows linear white space before and
        // after the MIME type, but not within the MIME type itself. And linear white space
        // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
        // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
        if (isSpaceOrNewline(c))
            continue;

        mimeType.append(c);
    }

    if (mimeType.length() == length)
        return mediaType;
    return mimeType.toAtomicString();
}

String extractCharsetFromMediaType(const String& mediaType)
{
    unsigned pos, len;
    findCharsetInMediaType(mediaType, pos, len);
    return mediaType.substring(pos, len);
}

void findCharsetInMediaType(const String& mediaType, unsigned& charsetPos, unsigned& charsetLen, unsigned start)
{
    charsetPos = start;
    charsetLen = 0;

    size_t pos = start;
    unsigned length = mediaType.length();

    while (pos < length) {
        pos = mediaType.find("charset", pos, false);
        if (pos == kNotFound || !pos) {
            charsetLen = 0;
            return;
        }

        // is what we found a beginning of a word?
        if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
            pos += 7;
            continue;
        }

        pos += 7;

        // skip whitespace
        while (pos != length && mediaType[pos] <= ' ')
            ++pos;

        if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
            continue;

        while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
            ++pos;

        // we don't handle spaces within quoted parameter values, because charset names cannot have any
        unsigned endpos = pos;
        while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
            ++endpos;

        charsetPos = pos;
        charsetLen = endpos - pos;
        return;
    }
}

ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
{
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, ("expected 0 or 1"));
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, ("expected semicolon"));
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, ("expected equals sign"));
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, ("invalid mode directive"));
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, ("invalid report directive"));
    DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, ("duplicate mode directive"));
    DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, ("duplicate report directive"));
    DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, ("unrecognized directive"));

    unsigned pos = 0;

    if (!skipWhiteSpace(header, pos, false))
        return ReflectedXSSUnset;

    if (header[pos] == '0')
        return AllowReflectedXSS;

    if (header[pos++] != '1') {
        failureReason = failureReasonInvalidToggle;
        return ReflectedXSSInvalid;
    }

    ReflectedXSSDisposition result = FilterReflectedXSS;
    bool modeDirectiveSeen = false;
    bool reportDirectiveSeen = false;

    while (1) {
        // At end of previous directive: consume whitespace, semicolon, and whitespace.
        if (!skipWhiteSpace(header, pos, false))
            return result;

        if (header[pos++] != ';') {
            failureReason = failureReasonInvalidSeparator;
            failurePosition = pos;
            return ReflectedXSSInvalid;
        }

        if (!skipWhiteSpace(header, pos, false))
            return result;

        // At start of next directive.
        if (skipToken(header, pos, "mode")) {
            if (modeDirectiveSeen) {
                failureReason = failureReasonDuplicateMode;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            modeDirectiveSeen = true;
            if (!skipEquals(header, pos)) {
                failureReason = failureReasonInvalidEquals;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            if (!skipToken(header, pos, "block")) {
                failureReason = failureReasonInvalidMode;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            result = BlockReflectedXSS;
        } else if (skipToken(header, pos, "report")) {
            if (reportDirectiveSeen) {
                failureReason = failureReasonDuplicateReport;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            reportDirectiveSeen = true;
            if (!skipEquals(header, pos)) {
                failureReason = failureReasonInvalidEquals;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            size_t startPos = pos;
            if (!skipValue(header, pos)) {
                failureReason = failureReasonInvalidReport;
                failurePosition = pos;
                return ReflectedXSSInvalid;
            }
            reportURL = header.substring(startPos, pos - startPos);
            failurePosition = startPos; // If later semantic check deems unacceptable.
        } else {
            failureReason = failureReasonInvalidDirective;
            failurePosition = pos;
            return ReflectedXSSInvalid;
        }
    }
}

ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
{
    if (header.stripWhiteSpace().lower() == "nosniff")
        return ContentTypeOptionsNosniff;
    return ContentTypeOptionsNone;
}

String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
{
    size_t spacePos = statusLine.find(' ');
    // Remove status code from the status line.
    spacePos = statusLine.find(' ', spacePos + 1);
    return statusLine.substring(spacePos + 1);
}

XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
{
    XFrameOptionsDisposition result = XFrameOptionsNone;

    if (header.isEmpty())
        return result;

    Vector<String> headers;
    header.split(',', headers);

    for (size_t i = 0; i < headers.size(); i++) {
        String currentHeader = headers[i].stripWhiteSpace();
        XFrameOptionsDisposition currentValue = XFrameOptionsNone;
        if (equalIgnoringCase(currentHeader, "deny"))
            currentValue = XFrameOptionsDeny;
        else if (equalIgnoringCase(currentHeader, "sameorigin"))
            currentValue = XFrameOptionsSameOrigin;
        else if (equalIgnoringCase(currentHeader, "allowall"))
            currentValue = XFrameOptionsAllowAll;
        else
            currentValue = XFrameOptionsInvalid;

        if (result == XFrameOptionsNone)
            result = currentValue;
        else if (result != currentValue)
            return XFrameOptionsConflict;
    }
    return result;
}

bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
{
    // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
    // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
    // We don't support multiple range requests.

    rangeOffset = rangeEnd = rangeSuffixLength = -1;

    // The "bytes" unit identifier should be present.
    static const char bytesStart[] = "bytes=";
    if (!range.startsWith(bytesStart, false))
        return false;
    String byteRange = range.substring(sizeof(bytesStart) - 1);

    // The '-' character needs to be present.
    int index = byteRange.find('-');
    if (index == -1)
        return false;

    // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
    // Example:
    //     -500
    if (!index) {
        String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
        bool ok;
        long long value = suffixLengthString.toInt64Strict(&ok);
        if (ok)
            rangeSuffixLength = value;
        return true;
    }

    // Otherwise, the first-byte-position and the last-byte-position are provied.
    // Examples:
    //     0-499
    //     500-
    String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
    bool ok;
    long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
    if (!ok)
        return false;

    String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
    long long lastBytePos = -1;
    if (!lastBytePosStr.isEmpty()) {
        lastBytePos = lastBytePosStr.toInt64Strict(&ok);
        if (!ok)
            return false;
    }

    if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
        return false;

    rangeOffset = firstBytePos;
    rangeEnd = lastBytePos;
    return true;
}

// HTTP/1.1 - RFC 2616
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
// Request-Line = Method SP Request-URI SP HTTP-Version CRLF
size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
{
    method = String();
    url = String();
    httpVersion = Unknown;

    const char* space1 = 0;
    const char* space2 = 0;
    const char* p;
    size_t consumedLength;

    for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
        if (*p == ' ') {
            if (!space1)
                space1 = p;
            else if (!space2)
                space2 = p;
        } else if (*p == '\n') {
            break;
        }
    }

    // Haven't finished header line.
    if (consumedLength == length) {
        failureReason = "Incomplete Request Line";
        return 0;
    }

    // RequestLine does not contain 3 parts.
    if (!space1 || !space2) {
        failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
        return 0;
    }

    // The line must end with "\r\n".
    const char* end = p + 1;
    if (*(end - 2) != '\r') {
        failureReason = "Request line does not end with CRLF";
        return 0;
    }

    // Request Method.
    method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.

    // Request URI.
    url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.

    // HTTP Version.
    String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
    if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
        httpVersion = Unknown;
    else if (httpVersionString[7] == '0')
        httpVersion = HTTP_1_0;
    else if (httpVersionString[7] == '1')
        httpVersion = HTTP_1_1;
    else
        httpVersion = Unknown;

    return end - data;
}

size_t parseHTTPHeader(const char* start, size_t length, String& failureReason, AtomicString& nameStr, AtomicString& valueStr)
{
    const char* p = start;
    const char* end = start + length;

    Vector<char> name;
    Vector<char> value;
    nameStr = nullAtom;
    valueStr = nullAtom;

    for (; p < end; p++) {
        switch (*p) {
        case '\r':
            if (name.isEmpty()) {
                if (p + 1 < end && *(p + 1) == '\n')
                    return (p + 2) - start;
                failureReason = "CR doesn't follow LF at " + trimInputSample(p, end - p);
                return 0;
            }
            failureReason = "Unexpected CR in name at " + trimInputSample(name.data(), name.size());
            return 0;
        case '\n':
            failureReason = "Unexpected LF in name at " + trimInputSample(name.data(), name.size());
            return 0;
        case ':':
            break;
        default:
            name.append(*p);
            continue;
        }
        if (*p == ':') {
            ++p;
            break;
        }
    }

    for (; p < end && *p == 0x20; p++) { }

    for (; p < end; p++) {
        switch (*p) {
        case '\r':
            break;
        case '\n':
            failureReason = "Unexpected LF in value at " + trimInputSample(value.data(), value.size());
            return 0;
        default:
            value.append(*p);
        }
        if (*p == '\r') {
            ++p;
            break;
        }
    }
    if (p >= end || *p != '\n') {
        failureReason = "CR doesn't follow LF after value at " + trimInputSample(p, end - p);
        return 0;
    }
    nameStr = AtomicString::fromUTF8(name.data(), name.size());
    valueStr = AtomicString::fromUTF8(value.data(), value.size());
    if (nameStr.isNull()) {
        failureReason = "Invalid UTF-8 sequence in header name";
        return 0;
    }
    if (valueStr.isNull()) {
        failureReason = "Invalid UTF-8 sequence in header value";
        return 0;
    }
    return p - start;
}

size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
{
    body.clear();
    body.append(data, length);

    return length;
}

}

/* [<][>][^][v][top][bottom][index][help] */