root/mbstring/libmbfl/filters/mbfilter_htmlent.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_html_enc
  2. mbfl_filt_conv_html_enc_flush
  3. mbfl_filt_conv_html_dec_ctor
  4. mbfl_filt_conv_html_dec_dtor
  5. mbfl_filt_conv_html_dec
  6. mbfl_filt_conv_html_dec_flush

/*
 * "streamable kanji code filter and converter"
 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
 *
 * LICENSE NOTICES
 *
 * This file is part of "streamable kanji code filter and converter",
 * which is distributed under the terms of GNU Lesser General Public 
 * License (version 2) as published by the Free Software Foundation.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with "streamable kanji code filter and converter";
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
 * Suite 330, Boston, MA  02111-1307  USA
 *
 * The author of this part: Marcus Boerger <helly@php.net>
 *
 */
/*
 * The source code included in this files was separated from mbfilter.c
 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
 * 
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif

#include "mbfilter.h"
#include "mbfilter_htmlent.h"
#include "html_entities.h"

static const int htmlentitifieds[256] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};

static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};

const mbfl_encoding mbfl_encoding_html_ent = {
        mbfl_no_encoding_html_ent,
        "HTML-ENTITIES",
        "HTML-ENTITIES",
        (const char *(*)[])&mbfl_encoding_html_ent_aliases,
        NULL,
        MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
};

const struct mbfl_convert_vtbl vtbl_wchar_html = {
        mbfl_no_encoding_wchar,
        mbfl_no_encoding_html_ent,
        mbfl_filt_conv_common_ctor,
        mbfl_filt_conv_common_dtor,
        mbfl_filt_conv_html_enc,
        mbfl_filt_conv_html_enc_flush
};

const struct mbfl_convert_vtbl vtbl_html_wchar = {
        mbfl_no_encoding_html_ent,
        mbfl_no_encoding_wchar,
        mbfl_filt_conv_html_dec_ctor,
        mbfl_filt_conv_html_dec_dtor,
        mbfl_filt_conv_html_dec,
        mbfl_filt_conv_html_dec_flush };


#define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)

/*
 * any => HTML
 */
int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
{
        int tmp[64];
        int i;
        unsigned int uc;
        const mbfl_html_entity_entry *e;

        if (c < sizeof(htmlentitifieds) / sizeof(htmlentitifieds[0]) &&
                                htmlentitifieds[c] != 1) {
                CK((*filter->output_function)(c, filter->data));
        } else {
                CK((*filter->output_function)('&', filter->data));
                for (i = 0; (e = &mbfl_html_entity_list[i])->name != NULL; i++) {
                        if (c == e->code) {
                                char *p;
                                
                                for (p = e->name; *p != '\0'; p++) {
                                        CK((*filter->output_function)((int)*p, filter->data));
                                }
                                goto last;
                        }
                }

                {
                        int *p = tmp + sizeof(tmp) / sizeof(tmp[0]);

                        CK((*filter->output_function)('#', filter->data));

                        uc = (unsigned int)c;

                        *(--p) = '\0';
                        do {
                                *(--p) = "0123456789"[uc % 10];
                                uc /= 10;
                        } while (uc);

                        for (; *p != '\0'; p++) {
                                CK((*filter->output_function)(*p, filter->data));
                        }
                }
        last:
                CK((*filter->output_function)(';', filter->data));
        }
        return c;
}

int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
{
        filter->status = 0;
        filter->opaque = NULL;

        if (filter->flush_function != NULL) {
                (*filter->flush_function)(filter->data);
        }

        return 0;
}

/*
 * HTML => any
 */
#define html_enc_buffer_size    16
static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
{
        filter->status = 0;
        filter->opaque = mbfl_malloc(html_enc_buffer_size+1);
}
        
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
{
        filter->status = 0;
        if (filter->opaque)
        {
                mbfl_free((void*)filter->opaque);
        }
        filter->opaque = NULL;
}

int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
{
        int  pos, ent = 0;
        mbfl_html_entity_entry *entity;
        char *buffer = (char*)filter->opaque;

        if (!filter->status) {
                if (c == '&' ) {
                        filter->status = 1;
                        buffer[0] = '&';
                } else {
                        CK((*filter->output_function)(c, filter->data));
                }
        } else {
                if (c == ';') {
                        if (buffer[1]=='#') {
                                if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
                                        if (filter->status > 3) {
                                                /* numeric entity */
                                                for (pos=3; pos<filter->status; pos++) {
                                                        int v =  buffer[pos];
                                                        if (v >= '0' && v <= '9') {
                                                                v = v - '0';
                                                        } else if (v >= 'A' && v <= 'F') {
                                                                v = v - 'A' + 10;
                                                        } else if (v >= 'a' && v <= 'f') {
                                                                v = v - 'a' + 10;
                                                        } else {
                                                                ent = -1;
                                                                break;
                                                        }
                                                        ent = ent * 16 + v;
                                                }
                                        } else {
                                                ent = -1;
                                        }
                                } else {
                                        /* numeric entity */
                                        if (filter->status > 2) {
                                                for (pos=2; pos<filter->status; pos++) {
                                                        int v = buffer[pos];
                                                        if (v >= '0' && v <= '9') {
                                                                v = v - '0';
                                                        } else {
                                                                ent = -1;
                                                                break;
                                                        }
                                                        ent = ent*10 + v;
                                                }
                                        } else {
                                                ent = -1;
                                        }
                                }
                                if (ent >= 0 && ent < 0x110000) {
                                        CK((*filter->output_function)(ent, filter->data));
                                } else {
                                        for (pos = 0; pos < filter->status; pos++) {
                                                CK((*filter->output_function)(buffer[pos], filter->data));
                                        }
                                        CK((*filter->output_function)(c, filter->data));
                                }
                                filter->status = 0;
                                /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
                        } else {
                                /* named entity */
                                buffer[filter->status] = 0;
                                entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
                                while (entity->name) {
                                        if (!strcmp(buffer+1, entity->name))    {
                                                ent = entity->code;
                                                break;
                                        }
                                        entity++;
                                }
                                if (ent) {
                                        /* decoded */
                                        CK((*filter->output_function)(ent, filter->data));
                                        filter->status = 0;
                                        /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE,"mbstring decoded '%s'=%d", buffer, ent);*/
                                } else { 
                                        /* failure */
                                        buffer[filter->status++] = ';';
                                        buffer[filter->status] = 0;
                                        /* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer); */
                                        mbfl_filt_conv_html_dec_flush(filter);
                                }
                        }
                } else {
                        /* add character */
                        buffer[filter->status++] = c;
                        /* add character and check */
                        if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
                        {
                                /* illegal character or end of buffer */
                                if (c=='&')
                                        filter->status--;
                                buffer[filter->status] = 0;
                                /* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer)l */
                                mbfl_filt_conv_html_dec_flush(filter);
                                if (c=='&')
                                {
                                        buffer[filter->status++] = '&';
                                }
                        }
                }
        }
        return c;
}

int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
{
        int status, pos = 0;
        unsigned char *buffer;
        int err = 0;

        buffer = (unsigned char*)filter->opaque;
        status = filter->status;
        filter->status = 0;

        /* flush fragments */
        while (status--) {
                int e = (*filter->output_function)(buffer[pos++], filter->data);
                if (e != 0)
                        err = e;
        }

        if (filter->flush_function != NULL) {
                (*filter->flush_function)(filter->data);
        }

        return err;
}



/* [<][>][^][v][top][bottom][index][help] */