This source file includes following definitions.
- InStateZero
- UTF8GenericProperty
- UTF8GenericPropertyBigOneByte
- UTF8GenericScan
#include "encodings/compact_lang_det/win/cld_utf8statetable.h"
#include "base/basictypes.h"
static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
const uint8* Tbl0 = &st->state_table[st->state0];
return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
}
uint8 UTF8GenericProperty(const UTF8PropObj* st,
const uint8** src,
int* srclen) {
if (*srclen <= 0) {
return 0;
}
const uint8* lsrc = *src;
const uint8* Tbl_0 = &st->state_table[st->state0];
const uint8* Tbl = Tbl_0;
int e;
int eshift = st->entry_shift;
unsigned char c = lsrc[0];
if (static_cast<signed char>(c) >= 0) {
e = Tbl[c];
*src += 1;
*srclen -= 1;
} else if (((c & 0xe0) == 0xc0) && (*srclen >= 2)) {
e = Tbl[c];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[1]];
*src += 2;
*srclen -= 2;
} else if (((c & 0xf0) == 0xe0) && (*srclen >= 3)) {
e = Tbl[c];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[1]];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[2]];
*src += 3;
*srclen -= 3;
}else if (((c & 0xf8) == 0xf0) && (*srclen >= 4)) {
e = Tbl[c];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[1]];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[2]];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[3]];
*src += 4;
*srclen -= 4;
} else {
e = 0;
*src += 1;
*srclen -= 1;
}
return e;
}
uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st,
const uint8** src,
int* srclen) {
if (*srclen <= 0) {
return 0;
}
const uint8* lsrc = *src;
const uint8* Tbl_0 = &st->state_table[st->state0];
const uint8* Tbl = Tbl_0;
int e;
int eshift = st->entry_shift;
unsigned char c = lsrc[0];
if (static_cast<signed char>(c) >= 0) {
e = Tbl[c];
*src += 1;
*srclen -= 1;
} else if (((c & 0xe0) == 0xc0) && (*srclen >= 2)) {
e = Tbl[c];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[1]];
*src += 2;
*srclen -= 2;
} else if (((c & 0xf0) == 0xe0) && (*srclen >= 3)) {
e = Tbl[c];
Tbl = &Tbl_0[e << (eshift + 4)];
e = (reinterpret_cast<const int8*>(Tbl))[lsrc[1]];
Tbl = &Tbl[e << eshift];
e = Tbl[lsrc[2]];
*src += 3;
*srclen -= 3;
}else if (((c & 0xf8) == 0xf0) && (*srclen >= 4)) {
e = Tbl[c];
Tbl = &Tbl_0[e << eshift];
e = Tbl[lsrc[1]];
Tbl = &Tbl_0[e << (eshift + 4)];
e = (reinterpret_cast<const int8*>(Tbl))[lsrc[2]];
Tbl = &Tbl[e << eshift];
e = Tbl[lsrc[3]];
*src += 4;
*srclen -= 4;
} else {
e = 0;
*src += 1;
*srclen -= 1;
}
return e;
}
int UTF8GenericScan(const UTF8ScanObj* st,
const uint8* str,
const int len,
int* bytes_consumed) {
int eshift = st->entry_shift;
const uint8* isrc = str;
const uint8* src = isrc;
const uint8* srclimit = isrc + len;
const uint8* srclimit8 = srclimit - 7;
*bytes_consumed = 0;
if (len == 0) return kExitOK;
const uint8* Tbl_0 = &st->state_table[st->state0];
DoAgain:
int e = 0;
uint8 c;
const uint8* Tbl2 = &st->fast_state[0];
uint32 losub = st->losub;
uint32 hiadd = st->hiadd;
while (src < srclimit8) {
uint32 s0123 = UnalignedLoad32(src);
uint32 s4567 = UnalignedLoad32(src + 4);
src += 8;
uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
(s4567 - losub) | (s4567 + hiadd);
if ((temp & 0x80808080) != 0) {
int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
(Tbl2[src[-6]] | Tbl2[src[-5]]);
if (e0123 != 0) {src -= 8; break;}
e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
(Tbl2[src[-2]] | Tbl2[src[-1]]);
if (e0123 != 0) {src -= 4; break;}
}
}
const uint8* Tbl = Tbl_0;
while (src < srclimit) {
c = *src;
e = Tbl[c];
src++;
if (e >= kExitIllegalStructure) {break;}
Tbl = &Tbl_0[e << eshift];
}
if (e >= kExitIllegalStructure) {
src--;
if (!InStateZero(st, Tbl)) {
do {src--;} while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
}
} else if (!InStateZero(st, Tbl)) {
e = kExitIllegalStructure;
do {src--;} while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
} else {
e = kExitOK;
}
if (e == kExitDoAgain) {
goto DoAgain;
}
*bytes_consumed = src - isrc;
return e;
}