This source file includes following definitions.
- FindAddress
- FindAddress
#include "content/common/android/address_parser.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "content/common/android/address_parser_internal.h"
namespace {
const size_t kMinAddressWords = 3;
const size_t kMaxAddressWords = 12;
const size_t kMaxAddressLines = 5;
const size_t kMaxAddressNameWordLength = 25;
const size_t kMaxLocationNameDistance = 4;
const base::char16 kNewlineDelimiters[] = {
'\n',
',',
'*',
0x2022,
0,
};
}
namespace content {
namespace address_parser {
using namespace internal;
bool FindAddress(const base::string16& text, base::string16* address) {
size_t start, end;
if (FindAddress(text.begin(), text.end(), &start, &end)) {
size_t len = end >= start ? end - start : 0;
address->assign(text.substr(start, len));
return true;
}
return false;
}
bool FindAddress(const base::string16::const_iterator& begin,
const base::string16::const_iterator& end,
size_t* start_pos,
size_t* end_pos) {
HouseNumberParser house_number_parser;
const base::string16 newline_delimiters = kNewlineDelimiters;
const base::string16 delimiters = base::kWhitespaceUTF16 + newline_delimiters;
for (base::string16::const_iterator it = begin; it != end; ) {
Word house_number;
if (!house_number_parser.Parse(it, end, &house_number))
return false;
String16Tokenizer tokenizer(house_number.end, end, delimiters);
tokenizer.set_options(String16Tokenizer::RETURN_DELIMS);
WordList words;
words.push_back(house_number);
bool found_location_name = false;
bool continue_on_house_number = true;
bool consecutive_house_numbers = true;
size_t next_house_number_word = 0;
size_t num_lines = 1;
size_t next_word = 1;
for (; next_word <= kMaxAddressWords + 1; ++next_word) {
if (next_word == words.size()) {
do {
if (!tokenizer.GetNext())
return false;
if (tokenizer.token_is_delim() && newline_delimiters.find(
*tokenizer.token_begin()) != base::string16::npos) {
++num_lines;
}
} while (tokenizer.token_is_delim());
if (num_lines > kMaxAddressLines)
break;
words.push_back(Word(tokenizer.token_begin(), tokenizer.token_end()));
}
const Word& current_word = words[next_word];
DCHECK_GT(std::distance(current_word.begin, current_word.end), 0);
size_t current_word_length = std::distance(
current_word.begin, current_word.end);
if (current_word_length > kMaxAddressNameWordLength) {
continue_on_house_number = false;
break;
}
if (house_number_parser.Parse(current_word.begin, current_word.end,
NULL)) {
if (consecutive_house_numbers) {
if (num_lines > 1) {
next_house_number_word = next_word;
break;
}
}
if (next_house_number_word == 0) {
next_house_number_word = next_word;
continue;
}
} else {
consecutive_house_numbers = false;
}
if (next_word <= kMaxLocationNameDistance &&
IsValidLocationName(current_word)) {
found_location_name = true;
continue;
}
if (next_word > kMinAddressWords) {
size_t state_first_word = next_word;
size_t state_last_word, state_index;
if (FindStateStartingInWord(&words, state_first_word, &state_last_word,
&tokenizer, &state_index)) {
if (!found_location_name)
break;
if (current_word_length == 2 && words.size() > 2) {
const Word& previous_word = words[state_first_word - 1];
if (previous_word.end - previous_word.begin == 2 &&
LowerCaseEqualsASCII(previous_word.begin, previous_word.end,
"et") &&
LowerCaseEqualsASCII(current_word.begin, current_word.end,
"al"))
break;
}
size_t zip_word = state_last_word + 1;
if (zip_word == words.size()) {
do {
if (!tokenizer.GetNext())
return false;
} while (tokenizer.token_is_delim());
words.push_back(Word(tokenizer.token_begin(),
tokenizer.token_end()));
}
next_word = state_last_word;
if (!IsZipValid(words[zip_word], state_index))
continue;
*start_pos = words[0].begin - begin;
*end_pos = words[zip_word].end - begin;
return true;
}
}
}
if (continue_on_house_number && next_house_number_word > 0) {
it = words[next_house_number_word].begin;
} else {
DCHECK(!words.empty());
next_word = std::min(next_word, words.size() - 1);
it = words[next_word].end;
}
}
return false;
}
}
}