This source file includes following definitions.
- UTF7Encoding
- m_backslashAsCurrencySymbol
- m_backslashAsCurrencySymbol
- decode
- encode
- normalizeAndEncode
- usesVisualOrdering
- backslashAsCurrencySymbol
- isNonByteBasedEncoding
- isUTF7Encoding
- closestByteBasedEquivalent
- encodingForFormSubmission
- ASCIIEncoding
- Latin1Encoding
- UTF16BigEndianEncoding
- UTF16LittleEndianEncoding
- UTF32BigEndianEncoding
- UTF32LittleEndianEncoding
- UTF8Encoding
- WindowsLatin1Encoding
#include "config.h"
#include "wtf/text/TextEncoding.h"
#include "wtf/text/TextEncodingRegistry.h"
#include <unicode/unorm.h>
#include "wtf/OwnPtr.h"
#include "wtf/StdLibExtras.h"
#include "wtf/text/CString.h"
#include "wtf/text/WTFString.h"
namespace WTF {
static const TextEncoding& UTF7Encoding()
{
static TextEncoding globalUTF7Encoding("UTF-7");
return globalUTF7Encoding;
}
TextEncoding::TextEncoding(const char* name)
: m_name(atomicCanonicalTextEncodingName(name))
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
}
TextEncoding::TextEncoding(const String& name)
: m_name(atomicCanonicalTextEncodingName(name))
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
}
String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
{
if (!m_name)
return String();
return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, sawError);
}
CString TextEncoding::encode(const String& string, UnencodableHandling handling) const
{
if (!m_name)
return CString();
if (string.isEmpty())
return "";
OwnPtr<TextCodec> textCodec = newTextCodec(*this);
CString encodedString;
if (string.is8Bit())
encodedString = textCodec->encode(string.characters8(), string.length(), handling);
else
encodedString = textCodec->encode(string.characters16(), string.length(), handling);
return encodedString;
}
CString TextEncoding::normalizeAndEncode(const String& string, UnencodableHandling handling) const
{
if (!m_name)
return CString();
if (string.isEmpty())
return "";
if (string.is8Bit())
return newTextCodec(*this)->encode(string.characters8(), string.length(), handling);
const UChar* source = string.characters16();
size_t length = string.length();
Vector<UChar> normalizedCharacters;
UErrorCode err = U_ZERO_ERROR;
if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) {
normalizedCharacters.grow(length);
int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
if (err == U_BUFFER_OVERFLOW_ERROR) {
err = U_ZERO_ERROR;
normalizedCharacters.resize(normalizedLength);
normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
}
ASSERT(U_SUCCESS(err));
source = normalizedCharacters.data();
length = normalizedLength;
}
return newTextCodec(*this)->encode(source, length, handling);
}
bool TextEncoding::usesVisualOrdering() const
{
if (noExtendedTextEncodingNameUsed())
return false;
static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
return m_name == a;
}
UChar TextEncoding::backslashAsCurrencySymbol() const
{
return shouldShowBackslashAsCurrencySymbolIn(m_name) ? 0x00A5 : '\\';
}
bool TextEncoding::isNonByteBasedEncoding() const
{
if (noExtendedTextEncodingNameUsed()) {
return *this == UTF16LittleEndianEncoding()
|| *this == UTF16BigEndianEncoding();
}
return *this == UTF16LittleEndianEncoding()
|| *this == UTF16BigEndianEncoding()
|| *this == UTF32BigEndianEncoding()
|| *this == UTF32LittleEndianEncoding();
}
bool TextEncoding::isUTF7Encoding() const
{
if (noExtendedTextEncodingNameUsed())
return false;
return *this == UTF7Encoding();
}
const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
{
if (isNonByteBasedEncoding())
return UTF8Encoding();
return *this;
}
const TextEncoding& TextEncoding::encodingForFormSubmission() const
{
if (isNonByteBasedEncoding() || isUTF7Encoding())
return UTF8Encoding();
return *this;
}
const TextEncoding& ASCIIEncoding()
{
static TextEncoding globalASCIIEncoding("ASCII");
return globalASCIIEncoding;
}
const TextEncoding& Latin1Encoding()
{
static TextEncoding globalLatin1Encoding("latin1");
return globalLatin1Encoding;
}
const TextEncoding& UTF16BigEndianEncoding()
{
static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE");
return globalUTF16BigEndianEncoding;
}
const TextEncoding& UTF16LittleEndianEncoding()
{
static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE");
return globalUTF16LittleEndianEncoding;
}
const TextEncoding& UTF32BigEndianEncoding()
{
static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE");
return globalUTF32BigEndianEncoding;
}
const TextEncoding& UTF32LittleEndianEncoding()
{
static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE");
return globalUTF32LittleEndianEncoding;
}
const TextEncoding& UTF8Encoding()
{
static TextEncoding globalUTF8Encoding("UTF-8");
ASSERT(globalUTF8Encoding.isValid());
return globalUTF8Encoding;
}
const TextEncoding& WindowsLatin1Encoding()
{
static TextEncoding globalWindowsLatin1Encoding("WinLatin1");
return globalWindowsLatin1Encoding;
}
}