root/Source/wtf/text/StringUTF8Adaptor.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


/*
 * Copyright (C) 2013 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef StringUTF8Adaptor_h
#define StringUTF8Adaptor_h

#include "wtf/text/CString.h"
#include "wtf/text/TextEncoding.h"
#include "wtf/text/WTFString.h"

namespace WTF {

// This class lets you get UTF-8 data out of a String without mallocing a
// separate buffer to hold the data if the String happens to be 8 bit and
// contain only ASCII characters.
class StringUTF8Adaptor {
public:
    enum ShouldNormalize {
        DoNotNormalize,
        Normalize
    };

    explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize = DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables)
        : m_data(0)
        , m_length(0)
    {
        if (string.isEmpty())
            return;
        // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses UTF-8
        // when processing 8 bit strings. If |relative| is entirely ASCII, we luck out
        // and can avoid mallocing a new buffer to hold the UTF-8 data because UTF-8
        // and Latin-1 use the same code units for ASCII code points.
        if (string.is8Bit() && string.containsOnlyASCII()) {
            m_data = reinterpret_cast<const char*>(string.characters8());
            m_length = string.length();
        } else {
            if (normalize == Normalize)
                m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handling);
            else
                m_utf8Buffer = string.utf8();
            m_data = m_utf8Buffer.data();
            m_length = m_utf8Buffer.length();
        }
    }

    const char* data() const { return m_data; }
    size_t length() const { return m_length; }

private:
    CString m_utf8Buffer;
    const char* m_data;
    size_t m_length;
};

} // namespace WTF

using WTF::StringUTF8Adaptor;

#endif // StringUTF8Adaptor_h

/* [<][>][^][v][top][bottom][index][help] */