This source file includes following definitions.
- encodingRegistryMutex
- checkExistingName
- checkExistingName
- isUndesiredAlias
- addToTextEncodingNameMap
- addToTextCodecMap
- pruneBlacklistedCodecs
- buildBaseTextCodecMaps
- addEncodingName
- buildQuirksSets
- isJapaneseEncoding
- shouldShowBackslashAsCurrencySymbolIn
- extendTextCodecMaps
- newTextCodec
- atomicCanonicalTextEncodingName
- atomicCanonicalTextEncodingName
- atomicCanonicalTextEncodingName
- noExtendedTextEncodingNameUsed
- dumpTextEncodingNameMap
#include "config.h"
#include "wtf/text/TextEncodingRegistry.h"
#include "wtf/ASCIICType.h"
#include "wtf/CurrentTime.h"
#include "wtf/HashMap.h"
#include "wtf/HashSet.h"
#include "wtf/MainThread.h"
#include "wtf/StdLibExtras.h"
#include "wtf/StringExtras.h"
#include "wtf/ThreadingPrimitives.h"
#include "wtf/text/CString.h"
#include "wtf/text/TextCodecICU.h"
#include "wtf/text/TextCodecLatin1.h"
#include "wtf/text/TextCodecUTF16.h"
#include "wtf/text/TextCodecUTF8.h"
#include "wtf/text/TextCodecUserDefined.h"
#include "wtf/text/TextEncoding.h"
namespace WTF {
const size_t maxEncodingNameLength = 63;
struct TextEncodingNameHash {
static bool equal(const char* s1, const char* s2)
{
char c1;
char c2;
do {
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170051106
c1 = toASCIILower(*s1++);
c2 = toASCIILower(*s2++);
if (c1 != c2)
return false;
#else
c1 = *s1++;
c2 = *s2++;
if (toASCIILower(c1) != toASCIILower(c2))
return false;
#endif
} while (c1 && c2);
return !c1 && !c2;
}
static unsigned hash(const char* s)
{
unsigned h = WTF::stringHashingStartValue;
for (;;) {
char c = *s++;
if (!c) {
h += (h << 3);
h ^= (h >> 11);
h += (h << 15);
return h;
}
h += toASCIILower(c);
h += (h << 10);
h ^= (h >> 6);
}
}
static const bool safeToCompareToEmptyOrDeleted = false;
};
struct TextCodecFactory {
NewTextCodecFunction function;
const void* additionalData;
TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
};
typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
static Mutex& encodingRegistryMutex()
{
DEFINE_STATIC_LOCAL(Mutex, mutex, ());
return mutex;
}
static TextEncodingNameMap* textEncodingNameMap;
static TextCodecMap* textCodecMap;
static bool didExtendTextCodecMaps;
static HashSet<const char*>* japaneseEncodings;
static HashSet<const char*>* nonBackslashEncodings;
static const char textEncodingNameBlacklist[][6] = { "UTF-7" };
#if ERROR_DISABLED
static inline void checkExistingName(const char*, const char*) { }
#else
static void checkExistingName(const char* alias, const char* atomicName)
{
const char* oldAtomicName = textEncodingNameMap->get(alias);
if (!oldAtomicName)
return;
if (oldAtomicName == atomicName)
return;
if (strcmp(alias, "ISO-8859-8-I") == 0
&& strcmp(oldAtomicName, "ISO-8859-8-I") == 0
&& strcasecmp(atomicName, "iso-8859-8") == 0)
return;
WTF_LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName);
}
#endif
static bool isUndesiredAlias(const char* alias)
{
for (const char* p = alias; *p; ++p) {
if (*p == ',')
return true;
}
if (0 == strcmp(alias, "8859_1"))
return true;
return false;
}
static void addToTextEncodingNameMap(const char* alias, const char* name)
{
ASSERT(strlen(alias) <= maxEncodingNameLength);
if (isUndesiredAlias(alias))
return;
const char* atomicName = textEncodingNameMap->get(name);
ASSERT(strcmp(alias, name) == 0 || atomicName);
if (!atomicName)
atomicName = name;
checkExistingName(alias, atomicName);
textEncodingNameMap->add(alias, atomicName);
}
static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
{
const char* atomicName = textEncodingNameMap->get(name);
ASSERT(atomicName);
textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
}
static void pruneBlacklistedCodecs()
{
for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
if (!atomicName)
continue;
Vector<const char*> names;
TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
for (; it != end; ++it) {
if (it->value == atomicName)
names.append(it->key);
}
size_t length = names.size();
for (size_t j = 0; j < length; ++j)
textEncodingNameMap->remove(names[j]);
textCodecMap->remove(atomicName);
}
}
static void buildBaseTextCodecMaps()
{
ASSERT(isMainThread());
ASSERT(!textCodecMap);
ASSERT(!textEncodingNameMap);
textCodecMap = new TextCodecMap;
textEncodingNameMap = new TextEncodingNameMap;
TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
TextCodecLatin1::registerCodecs(addToTextCodecMap);
TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap);
TextCodecUTF8::registerCodecs(addToTextCodecMap);
TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
TextCodecUTF16::registerCodecs(addToTextCodecMap);
TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
TextCodecUserDefined::registerCodecs(addToTextCodecMap);
}
static void addEncodingName(HashSet<const char*>* set, const char* name)
{
const char* atomicName = textEncodingNameMap->get(name);
if (atomicName)
set->add(atomicName);
}
static void buildQuirksSets()
{
ASSERT(!japaneseEncodings);
ASSERT(!nonBackslashEncodings);
japaneseEncodings = new HashSet<const char*>;
addEncodingName(japaneseEncodings, "EUC-JP");
addEncodingName(japaneseEncodings, "ISO-2022-JP");
addEncodingName(japaneseEncodings, "ISO-2022-JP-1");
addEncodingName(japaneseEncodings, "ISO-2022-JP-2");
addEncodingName(japaneseEncodings, "ISO-2022-JP-3");
addEncodingName(japaneseEncodings, "JIS_C6226-1978");
addEncodingName(japaneseEncodings, "JIS_X0201");
addEncodingName(japaneseEncodings, "JIS_X0208-1983");
addEncodingName(japaneseEncodings, "JIS_X0208-1990");
addEncodingName(japaneseEncodings, "JIS_X0212-1990");
addEncodingName(japaneseEncodings, "Shift_JIS");
addEncodingName(japaneseEncodings, "Shift_JIS_X0213-2000");
addEncodingName(japaneseEncodings, "cp932");
addEncodingName(japaneseEncodings, "x-mac-japanese");
nonBackslashEncodings = new HashSet<const char*>;
addEncodingName(nonBackslashEncodings, "x-mac-japanese");
addEncodingName(nonBackslashEncodings, "ISO-2022-JP");
addEncodingName(nonBackslashEncodings, "EUC-JP");
addEncodingName(nonBackslashEncodings, "Shift_JIS");
addEncodingName(nonBackslashEncodings, "Shift_JIS_X0213-2000");
}
bool isJapaneseEncoding(const char* canonicalEncodingName)
{
return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName);
}
bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
{
return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName);
}
static void extendTextCodecMaps()
{
TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
TextCodecICU::registerCodecs(addToTextCodecMap);
pruneBlacklistedCodecs();
buildQuirksSets();
}
PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
{
MutexLocker lock(encodingRegistryMutex());
ASSERT(textCodecMap);
TextCodecFactory factory = textCodecMap->get(encoding.name());
ASSERT(factory.function);
return factory.function(encoding, factory.additionalData);
}
const char* atomicCanonicalTextEncodingName(const char* name)
{
if (!name || !name[0])
return 0;
if (!textEncodingNameMap)
buildBaseTextCodecMaps();
MutexLocker lock(encodingRegistryMutex());
if (const char* atomicName = textEncodingNameMap->get(name))
return atomicName;
if (didExtendTextCodecMaps)
return 0;
extendTextCodecMaps();
didExtendTextCodecMaps = true;
return textEncodingNameMap->get(name);
}
template <typename CharacterType>
const char* atomicCanonicalTextEncodingName(const CharacterType* characters, size_t length)
{
char buffer[maxEncodingNameLength + 1];
size_t j = 0;
for (size_t i = 0; i < length; ++i) {
CharacterType c = characters[i];
if (j == maxEncodingNameLength)
return 0;
buffer[j++] = c;
}
buffer[j] = 0;
return atomicCanonicalTextEncodingName(buffer);
}
const char* atomicCanonicalTextEncodingName(const String& alias)
{
if (!alias.length())
return 0;
if (alias.is8Bit())
return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), alias.length());
return atomicCanonicalTextEncodingName<UChar>(alias.characters16(), alias.length());
}
bool noExtendedTextEncodingNameUsed()
{
return !didExtendTextCodecMaps;
}
#ifndef NDEBUG
void dumpTextEncodingNameMap()
{
unsigned size = textEncodingNameMap->size();
fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size);
MutexLocker lock(encodingRegistryMutex());
TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
for (; it != end; ++it)
fprintf(stderr, "'%s' => '%s'\n", it->key, it->value);
}
#endif
}