This source file includes following definitions.
- tasks_finished_
- IsStringUtf8
- TestNumber
- TestRange
- TEST_F
- prefix_length_
- end
- prefix_length_
- Advance
- CheckRange
- CheckRangeByteAtATime
- CheckCombinations
- TEST
- TEST
- TEST
- TEST
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST_F
- TEST
- TEST
- TEST
- TEST
#include "base/i18n/streaming_utf8_validator.h"
#include <stdio.h>
#include <string.h>
#include <string>
#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"
#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
#include "base/basictypes.h"
#include "base/bind.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/memory/ref_counted.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/synchronization/condition_variable.h"
#include "base/synchronization/lock.h"
#include "base/threading/sequenced_worker_pool.h"
#include "third_party/icu/source/common/unicode/utf8.h"
#endif
namespace base {
namespace {
const StreamingUtf8Validator::State VALID_ENDPOINT =
StreamingUtf8Validator::VALID_ENDPOINT;
const StreamingUtf8Validator::State VALID_MIDPOINT =
StreamingUtf8Validator::VALID_MIDPOINT;
const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID;
#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
const uint32 kThoroughTestChunkSize = 1 << 24;
class StreamingUtf8ValidatorThoroughTest : public ::testing::Test {
protected:
StreamingUtf8ValidatorThoroughTest()
: all_done_(&lock_), tasks_dispatched_(0), tasks_finished_(0) {}
static bool IsStringUtf8(const char* src, int32 src_len) {
int32 char_index = 0;
while (char_index < src_len) {
int32 code_point;
U8_NEXT(src, char_index, src_len, code_point);
if (!base::IsValidCodepoint(code_point))
return false;
}
return true;
}
void TestNumber(uint32 n) const {
char test[sizeof n];
memcpy(test, &n, sizeof n);
StreamingUtf8Validator validator;
EXPECT_EQ(IsStringUtf8(test, sizeof n),
validator.AddBytes(test, sizeof n) == VALID_ENDPOINT)
<< "Difference of opinion for \""
<< base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X",
test[0] & 0xFF,
test[1] & 0xFF,
test[2] & 0xFF,
test[3] & 0xFF) << "\"";
}
public:
void TestRange(uint32 begin, uint32 size) {
for (uint32 i = 0; i < size; ++i) {
TestNumber(begin + i);
}
base::AutoLock al(lock_);
++tasks_finished_;
LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_
<< " tasks done\n";
if (tasks_finished_ >= tasks_dispatched_) {
all_done_.Signal();
}
}
protected:
base::Lock lock_;
base::ConditionVariable all_done_;
int tasks_dispatched_;
int tasks_finished_;
};
TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) {
scoped_refptr<base::SequencedWorkerPool> pool =
new base::SequencedWorkerPool(32, "TestEverything");
base::AutoLock al(lock_);
uint32 begin = 0;
do {
pool->PostWorkerTask(
FROM_HERE,
base::Bind(&StreamingUtf8ValidatorThoroughTest::TestRange,
base::Unretained(this),
begin,
kThoroughTestChunkSize));
++tasks_dispatched_;
begin += kThoroughTestChunkSize;
} while (begin != 0);
while (tasks_finished_ < tasks_dispatched_)
all_done_.Wait();
}
#endif
const char* const valid[] = {"\r", "\n", "a",
"\xc2\x81", "\xe1\x80\xbf", "\xf1\x80\xa0\xbf",
"\xef\xbb\xbf",
};
const char* const* const valid_end = valid + arraysize(valid);
const char* const invalid[] = {
"\xc0", "\xc1",
"\xf5", "\xf6", "\xf7",
"\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
"\xed\xa0\x80", "\xed\x0a\x8f", "\xed\xbf\xbf",
"\xc0\x80"
"\xc1\x80",
"\xc1\x81",
"\xe0\x80\x80",
"\xe0\x82\x80",
"\xe0\x9f\xbf",
"\xf0\x80\x80\x8D",
"\xf0\x80\x82\x91",
"\xf0\x80\xa0\x80",
"\xf0\x8f\xbb\xbf",
"\xf8\x80\x80\x80\xbf",
"\xfc\x80\x80\x80\xa0\xa5",
"\xf4\x90\x80\x80",
"\xf8\xa0\xbf\x80\xbf",
"\xfc\x9c\xbf\x80\xbf\x80",
"\xfe\xff", "\xff\xfe",
};
const char* const* const invalid_end = invalid + arraysize(invalid);
class PartialIterator {
public:
PartialIterator() : index_(0), prefix_length_(0) { Advance(); }
static PartialIterator end() { return PartialIterator(arraysize(valid), 1); }
PartialIterator& operator++() {
Advance();
return *this;
}
base::StringPiece operator*() const {
return base::StringPiece(valid[index_], prefix_length_);
}
bool operator==(const PartialIterator& rhs) const {
return index_ == rhs.index_ && prefix_length_ == rhs.prefix_length_;
}
bool operator!=(const PartialIterator& rhs) const { return !(rhs == *this); }
private:
PartialIterator(size_t index, size_t prefix_length)
: index_(index), prefix_length_(prefix_length) {}
void Advance() {
if (index_ < arraysize(valid) && prefix_length_ < strlen(valid[index_]))
++prefix_length_;
while (index_ < arraysize(valid) &&
prefix_length_ == strlen(valid[index_])) {
++index_;
prefix_length_ = 1;
}
}
size_t index_;
size_t prefix_length_;
};
class StreamingUtf8ValidatorSingleSequenceTest : public ::testing::Test {
protected:
template <typename Iterator>
void CheckRange(Iterator begin,
Iterator end,
StreamingUtf8Validator::State expected) {
for (Iterator it = begin; it != end; ++it) {
StreamingUtf8Validator validator;
base::StringPiece sequence = *it;
EXPECT_EQ(expected,
validator.AddBytes(sequence.data(), sequence.size()))
<< "Failed for \"" << sequence << "\"";
}
}
template <typename Iterator>
void CheckRangeByteAtATime(Iterator begin,
Iterator end,
StreamingUtf8Validator::State expected) {
for (Iterator it = begin; it != end; ++it) {
StreamingUtf8Validator validator;
base::StringPiece sequence = *it;
StreamingUtf8Validator::State state = VALID_ENDPOINT;
for (base::StringPiece::const_iterator cit = sequence.begin();
cit != sequence.end();
++cit) {
state = validator.AddBytes(&*cit, 1);
}
EXPECT_EQ(expected, state) << "Failed for \"" << sequence << "\"";
}
}
};
class StreamingUtf8ValidatorDoubleSequenceTest : public ::testing::Test {
protected:
template <typename Iterator1, typename Iterator2>
void CheckCombinations(Iterator1 begin1,
Iterator1 end1,
Iterator2 begin2,
Iterator2 end2,
StreamingUtf8Validator::State expected) {
StreamingUtf8Validator validator;
for (Iterator1 it1 = begin1; it1 != end1; ++it1) {
base::StringPiece c1 = *it1;
for (Iterator2 it2 = begin2; it2 != end2; ++it2) {
base::StringPiece c2 = *it2;
validator.AddBytes(c1.data(), c1.size());
EXPECT_EQ(expected, validator.AddBytes(c2.data(), c2.size()))
<< "Failed for \"" << c1 << c2 << "\"";
validator.Reset();
}
}
}
};
TEST(StreamingUtf8ValidatorTest, NothingIsValid) {
static const char kNothing[] = "";
EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNothing, 0));
}
TEST(StreamingUtf8ValidatorTest, NulIsValid) {
static const char kNul[] = "\x00";
EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNul, 1));
}
TEST(StreamingUtf8ValidatorTest, HelloWorld) {
static const char kHelloWorld[] = "Hello, World!";
EXPECT_EQ(
VALID_ENDPOINT,
StreamingUtf8Validator().AddBytes(kHelloWorld, strlen(kHelloWorld)));
}
TEST(StreamingUtf8ValidatorTest, ResetWorks) {
StreamingUtf8Validator validator;
EXPECT_EQ(INVALID, validator.AddBytes("\xC0", 1));
EXPECT_EQ(INVALID, validator.AddBytes("a", 1));
validator.Reset();
EXPECT_EQ(VALID_ENDPOINT, validator.AddBytes("a", 1));
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Valid) {
CheckRange(valid, valid_end, VALID_ENDPOINT);
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Partial) {
CheckRange(PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Invalid) {
CheckRange(invalid, invalid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, ValidByByte) {
CheckRangeByteAtATime(valid, valid_end, VALID_ENDPOINT);
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, PartialByByte) {
CheckRangeByteAtATime(
PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
}
TEST_F(StreamingUtf8ValidatorSingleSequenceTest, InvalidByByte) {
CheckRangeByteAtATime(invalid, invalid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusValidIsValid) {
CheckCombinations(valid, valid_end, valid, valid_end, VALID_ENDPOINT);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusPartialIsPartial) {
CheckCombinations(valid,
valid_end,
PartialIterator(),
PartialIterator::end(),
VALID_MIDPOINT);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusValidIsInvalid) {
CheckCombinations(
PartialIterator(), PartialIterator::end(), valid, valid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusPartialIsInvalid) {
CheckCombinations(PartialIterator(),
PartialIterator::end(),
PartialIterator(),
PartialIterator::end(),
INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusInvalidIsInvalid) {
CheckCombinations(valid, valid_end, invalid, invalid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusValidIsInvalid) {
CheckCombinations(invalid, invalid_end, valid, valid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusInvalidIsInvalid) {
CheckCombinations(invalid, invalid_end, invalid, invalid_end, INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusPartialIsInvalid) {
CheckCombinations(
invalid, invalid_end, PartialIterator(), PartialIterator::end(), INVALID);
}
TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusInvalidIsInvalid) {
CheckCombinations(
PartialIterator(), PartialIterator::end(), invalid, invalid_end, INVALID);
}
TEST(StreamingUtf8ValidatorValidateTest, EmptyIsValid) {
EXPECT_TRUE(StreamingUtf8Validator::Validate(std::string()));
}
TEST(StreamingUtf8ValidatorValidateTest, SimpleValidCase) {
EXPECT_TRUE(StreamingUtf8Validator::Validate("\xc2\x81"));
}
TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) {
EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80"));
}
TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) {
EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2"));
}
}
}