/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- mbfl_filt_conv_html_enc
- mbfl_filt_conv_html_enc_flush
- mbfl_filt_conv_html_dec_ctor
- mbfl_filt_conv_html_dec_dtor
- mbfl_filt_conv_html_dec
- mbfl_filt_conv_html_dec_flush
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Marcus Boerger <helly@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include "mbfilter.h"
#include "mbfilter_htmlent.h"
#include "html_entities.h"
static const int htmlentitifieds[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};
const mbfl_encoding mbfl_encoding_html_ent = {
mbfl_no_encoding_html_ent,
"HTML-ENTITIES",
"HTML-ENTITIES",
(const char *(*)[])&mbfl_encoding_html_ent_aliases,
NULL,
MBFL_ENCTYPE_HTML_ENT
};
const struct mbfl_convert_vtbl vtbl_wchar_html = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_html_ent,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_html_enc,
mbfl_filt_conv_html_enc_flush
};
const struct mbfl_convert_vtbl vtbl_html_wchar = {
mbfl_no_encoding_html_ent,
mbfl_no_encoding_wchar,
mbfl_filt_conv_html_dec_ctor,
mbfl_filt_conv_html_dec_dtor,
mbfl_filt_conv_html_dec,
mbfl_filt_conv_html_dec_flush };
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* any => HTML
*/
int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
{
int tmp[64];
int i;
unsigned int uc;
const mbfl_html_entity_entry *e;
if (c < sizeof(htmlentitifieds) / sizeof(htmlentitifieds[0]) &&
htmlentitifieds[c] != 1) {
CK((*filter->output_function)(c, filter->data));
} else {
CK((*filter->output_function)('&', filter->data));
for (i = 0; (e = &mbfl_html_entity_list[i])->name != NULL; i++) {
if (c == e->code) {
char *p;
for (p = e->name; *p != '\0'; p++) {
CK((*filter->output_function)((int)*p, filter->data));
}
goto last;
}
}
{
int *p = tmp + sizeof(tmp) / sizeof(tmp[0]);
CK((*filter->output_function)('#', filter->data));
uc = (unsigned int)c;
*(--p) = '\0';
do {
*(--p) = "0123456789"[uc % 10];
uc /= 10;
} while (uc);
for (; *p != '\0'; p++) {
CK((*filter->output_function)(*p, filter->data));
}
}
last:
CK((*filter->output_function)(';', filter->data));
}
return c;
}
int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
{
filter->status = 0;
filter->opaque = NULL;
return 0;
}
/*
* HTML => any
*/
#define html_enc_buffer_size 16
static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
{
filter->status = 0;
filter->opaque = mbfl_malloc(html_enc_buffer_size+1);
}
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
{
filter->status = 0;
if (filter->opaque)
{
mbfl_free((void*)filter->opaque);
}
filter->opaque = NULL;
}
int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
{
int pos, ent = 0;
mbfl_html_entity_entry *entity;
char *buffer = (char*)filter->opaque;
if (!filter->status) {
if (c == '&' ) {
filter->status = 1;
buffer[0] = '&';
} else {
CK((*filter->output_function)(c, filter->data));
}
} else {
if (c == ';') {
buffer[filter->status] = 0;
if (buffer[1]=='#') {
/* numeric entity */
for (pos=2; pos<filter->status; pos++) {
ent = ent*10 + (buffer[pos] - '0');
}
CK((*filter->output_function)(ent, filter->data));
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
break;
}
entity++;
}
if (ent) {
/* decoded */
CK((*filter->output_function)(ent, filter->data));
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE,"mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* failure */
buffer[filter->status++] = ';';
buffer[filter->status] = 0;
/* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer); */
mbfl_filt_conv_html_dec_flush(filter);
}
}
} else {
/* add character */
buffer[filter->status++] = c;
/* add character and check */
if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
{
/* illegal character or end of buffer */
if (c=='&')
filter->status--;
buffer[filter->status] = 0;
/* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer)l */
mbfl_filt_conv_html_dec_flush(filter);
if (c=='&')
{
filter->status = 1;
buffer[0] = '&';
}
}
}
}
return c;
}
int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
{
int status, pos = 0;
char *buffer;
buffer = (char*)filter->opaque;
status = filter->status;
/* flush fragments */
while (status--) {
CK((*filter->output_function)(buffer[pos++], filter->data));
}
filter->status = 0;
/*filter->buffer = 0; of cause NOT*/
return 0;
}