root/mbstring/libmbfl/mbfl/mbfilter.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_buffer_converter_new
  2. mbfl_buffer_converter_new2
  3. mbfl_buffer_converter_delete
  4. mbfl_buffer_converter_reset
  5. mbfl_buffer_converter_illegal_mode
  6. mbfl_buffer_converter_illegal_substchar
  7. mbfl_buffer_converter_strncat
  8. mbfl_buffer_converter_feed
  9. mbfl_buffer_converter_feed2
  10. mbfl_buffer_converter_flush
  11. mbfl_buffer_converter_getbuffer
  12. mbfl_buffer_converter_result
  13. mbfl_buffer_converter_feed_result
  14. mbfl_buffer_illegalchars
  15. mbfl_encoding_detector_new
  16. mbfl_encoding_detector_new2
  17. mbfl_encoding_detector_delete
  18. mbfl_encoding_detector_feed
  19. mbfl_encoding_detector_judge2
  20. mbfl_encoding_detector_judge
  21. mbfl_convert_encoding
  22. mbfl_identify_encoding
  23. mbfl_identify_encoding2
  24. filter_count_output
  25. mbfl_strlen
  26. collector_strpos
  27. mbfl_oddlen
  28. mbfl_strpos
  29. mbfl_substr_count
  30. collector_substr
  31. mbfl_substr
  32. mbfl_strcut
  33. is_fullwidth
  34. filter_count_width
  35. mbfl_strwidth
  36. collector_strimwidth
  37. mbfl_strimwidth
  38. mbfl_ja_jp_hantozen
  39. mime_header_encoder_block_collector
  40. mime_header_encoder_collector
  41. mime_header_encoder_result
  42. mime_header_encoder_new
  43. mime_header_encoder_delete
  44. mime_header_encoder_feed
  45. mbfl_mime_header_encode
  46. mime_header_decoder_collector
  47. mime_header_decoder_result
  48. mime_header_decoder_new
  49. mime_header_decoder_delete
  50. mime_header_decoder_feed
  51. mbfl_mime_header_decode
  52. collector_encode_htmlnumericentity
  53. collector_decode_htmlnumericentity
  54. collector_encode_hex_htmlnumericentity
  55. mbfl_filt_decode_htmlnumericentity_flush
  56. mbfl_html_numeric_entity

/*
 * charset=UTF-8
 * vim600: encoding=utf-8
 */

/*
 * "streamable kanji code filter and converter"
 *
 * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
 *
 * This software is released under the GNU Lesser General Public License.
 * (Version 2.1, February 1999)
 * Please read the following detail of the licence (in japanese).
 *
 * ◆使用許諾条件◆
 *
 * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
 * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
 * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
 * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
 * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
 * することはできません。
 *
 * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
 * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
 * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
 * による許諾を得る必要があります。
 *
 * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
 * ます。「GNU Lesser General Public License」とは、これまでLibrary General
 * Public Licenseと呼ばれていたものです。
 *     http://www.gnu.org/ --- GNUウェブサイト
 *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
 * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
 *
 * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
 * はありません。
 *
 * ◆保証内容◆
 *
 * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
 * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
 * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
 * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
 * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
 * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
 * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
 * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
 * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
 * 契約・規定に優先します。
 *
 * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
 *
 * 〒102-0073
 * 東京都千代田区九段北1-13-5日本地所第一ビル4F
 * 株式会社ハッピーサイズ
 * Phone: 03-3512-3655, Fax: 03-3512-3656
 * Email: sales@happysize.co.jp
 * Web: http://happysize.com/
 *
 * ◆著者◆
 *
 * 金本 茂 <sgk@happysize.co.jp>
 *
 * ◆履歴◆
 *
 * 1998/11/10 sgk implementation in C++
 * 1999/4/25  sgk Cで書きなおし。
 * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
 * 1999/6/??      Unicodeサポート。
 * 1999/6/22  sgk ライセンスをLGPLに変更。
 *
 */

/* 
 * Unicode support
 *
 * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
 * All rights reserved.
 *
 */


#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stddef.h>

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif

#ifdef HAVE_STDDEF_H
#include <stddef.h>
#endif

#include "mbfilter.h"
#include "mbfl_filter_output.h"
#include "mbfilter_pass.h"
#include "filters/mbfilter_tl_jisx0201_jisx0208.h"

#include "eaw_table.h"

/* hex character table "0123456789ABCDEF" */
static char mbfl_hexchar_table[] = {
        0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
};



/*
 * encoding filter
 */
#define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)


/*
 *  buffering converter
 */
mbfl_buffer_converter *
mbfl_buffer_converter_new(
    enum mbfl_no_encoding from,
    enum mbfl_no_encoding to,
    int buf_initsz)
{
        const mbfl_encoding *_from = mbfl_no2encoding(from);
        const mbfl_encoding *_to = mbfl_no2encoding(to);

        return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
}

mbfl_buffer_converter *
mbfl_buffer_converter_new2(
        const mbfl_encoding *from,
        const mbfl_encoding *to, 
    int buf_initsz)
{
        mbfl_buffer_converter *convd;

        /* allocate */
        convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
        if (convd == NULL) {
                return NULL;
        }

        /* initialize */
        convd->from = from;
        convd->to = to;

        /* create convert filter */
        convd->filter1 = NULL;
        convd->filter2 = NULL;
        if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
                convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
        } else {
                convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
                if (convd->filter2 != NULL) {
                        convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
                                        mbfl_no_encoding_wchar,
                                        (int (*)(int, void*))convd->filter2->filter_function,
                                        (int (*)(void*))convd->filter2->filter_flush,
                                        convd->filter2);
                        if (convd->filter1 == NULL) {
                                mbfl_convert_filter_delete(convd->filter2);
                        }
                }
        }
        if (convd->filter1 == NULL) {
                return NULL;
        }

        mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);

        return convd;
}


void
mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
{
        if (convd != NULL) {
                if (convd->filter1) {
                        mbfl_convert_filter_delete(convd->filter1);
                }
                if (convd->filter2) {
                        mbfl_convert_filter_delete(convd->filter2);
                }
                mbfl_memory_device_clear(&convd->device);
                mbfl_free((void*)convd);
        }
}

void
mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
{
        mbfl_memory_device_reset(&convd->device);
}

int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
{
        if (convd != NULL) {
                if (convd->filter2 != NULL) {
                        convd->filter2->illegal_mode = mode;
                } else if (convd->filter1 != NULL) {
                        convd->filter1->illegal_mode = mode;
                } else {
                        return 0;
                }
        }

        return 1;
}

int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
{
        if (convd != NULL) {
                if (convd->filter2 != NULL) {
                        convd->filter2->illegal_substchar = substchar;
                } else if (convd->filter1 != NULL) {
                        convd->filter1->illegal_substchar = substchar;
                } else {
                        return 0;
                }
        }

        return 1;
}

int
mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
{
        mbfl_convert_filter *filter;
        int (*filter_function)(int c, mbfl_convert_filter *filter);

        if (convd != NULL && p != NULL) {
                filter = convd->filter1;
                if (filter != NULL) {
                        filter_function = filter->filter_function;
                        while (n > 0) {
                                if ((*filter_function)(*p++, filter) < 0) {
                                        break;
                                }
                                n--;
                        }
                }
        }

        return n;
}

int
mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
{
        return mbfl_buffer_converter_feed2(convd, string, NULL);
}

int
mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
{
        int n;
        unsigned char *p;
        mbfl_convert_filter *filter;
        int (*filter_function)(int c, mbfl_convert_filter *filter);

        if (convd == NULL || string == NULL) {
                return -1;
        }
        mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
        /* feed data */
        n = string->len;
        p = string->val;

        filter = convd->filter1;
        if (filter != NULL) {
                filter_function = filter->filter_function;
                while (n > 0) {
                        if ((*filter_function)(*p++, filter) < 0) {
                                if (loc) {
                                        *loc = p - string->val;
                                }
                                return -1;
                        }
                        n--;
                }
        }
        if (loc) {
                *loc = p - string->val;
        }
        return 0;
}


int
mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
{
        if (convd == NULL) {
                return -1;
        }

        if (convd->filter1 != NULL) {
                mbfl_convert_filter_flush(convd->filter1);
        }
        if (convd->filter2 != NULL) {
                mbfl_convert_filter_flush(convd->filter2);
        }

        return 0;
}

mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
{
        if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
                result->no_encoding = convd->to->no_encoding;
                result->val = convd->device.buffer;
                result->len = convd->device.pos;
        } else {
                result = NULL;
        }

        return result;
}

mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
{
        if (convd == NULL || result == NULL) {
                return NULL;
        }
        result->no_encoding = convd->to->no_encoding;
        return mbfl_memory_device_result(&convd->device, result);
}

mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, 
                                  mbfl_string *result)
{
        if (convd == NULL || string == NULL || result == NULL) {
                return NULL;
        }
        mbfl_buffer_converter_feed(convd, string);
        if (convd->filter1 != NULL) {
                mbfl_convert_filter_flush(convd->filter1);
        }
        if (convd->filter2 != NULL) {
                mbfl_convert_filter_flush(convd->filter2);
        }
        result->no_encoding = convd->to->no_encoding;
        return mbfl_memory_device_result(&convd->device, result);
}

int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
{
        int num_illegalchars = 0;

        if (convd == NULL) {
                return 0;
        }

        if (convd->filter1 != NULL) {
                num_illegalchars += convd->filter1->num_illegalchar;
        }

        if (convd->filter2 != NULL) {
                num_illegalchars += convd->filter2->num_illegalchar;
        }

        return (num_illegalchars);
}

/*
 * encoding detector
 */
mbfl_encoding_detector *
mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
{
        mbfl_encoding_detector *identd;

        int i, num;
        mbfl_identify_filter *filter;

        if (elist == NULL || elistsz <= 0) {
                return NULL;
        }

        /* allocate */
        identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
        if (identd == NULL) {
                return NULL;
        }
        identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
        if (identd->filter_list == NULL) {
                mbfl_free(identd);
                return NULL;
        }

        /* create filters */
        i = 0;
        num = 0;
        while (i < elistsz) {
                filter = mbfl_identify_filter_new(elist[i]);
                if (filter != NULL) {
                        identd->filter_list[num] = filter;
                        num++;
                }
                i++;
        }
        identd->filter_list_size = num;

        /* set strict flag */
        identd->strict = strict;

        return identd;
}

mbfl_encoding_detector *
mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
{
        mbfl_encoding_detector *identd;

        int i, num;
        mbfl_identify_filter *filter;

        if (elist == NULL || elistsz <= 0) {
                return NULL;
        }

        /* allocate */
        identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
        if (identd == NULL) {
                return NULL;
        }
        identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
        if (identd->filter_list == NULL) {
                mbfl_free(identd);
                return NULL;
        }

        /* create filters */
        i = 0;
        num = 0;
        while (i < elistsz) {
                filter = mbfl_identify_filter_new2(elist[i]);
                if (filter != NULL) {
                        identd->filter_list[num] = filter;
                        num++;
                }
                i++;
        }
        identd->filter_list_size = num;

        /* set strict flag */
        identd->strict = strict;

        return identd;
}


void
mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
{
        int i;

        if (identd != NULL) {
                if (identd->filter_list != NULL) {
                        i = identd->filter_list_size;
                        while (i > 0) {
                                i--;
                                mbfl_identify_filter_delete(identd->filter_list[i]);
                        }
                        mbfl_free((void *)identd->filter_list);
                }
                mbfl_free((void *)identd);
        }
}

int
mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
{
        int i, n, num, bad, res;
        unsigned char *p;
        mbfl_identify_filter *filter;

        res = 0;
        /* feed data */
        if (identd != NULL && string != NULL && string->val != NULL) {
                num = identd->filter_list_size;
                n = string->len;
                p = string->val;
                bad = 0;
                while (n > 0) {
                        for (i = 0; i < num; i++) {
                                filter = identd->filter_list[i];
                                if (!filter->flag) {
                                        (*filter->filter_function)(*p, filter);
                                        if (filter->flag) {
                                                bad++;
                                        }
                                }
                        }
                        if ((num - 1) <= bad) {
                                res = 1;
                                break;
                        }
                        p++;
                        n--;
                }
        }

        return res;
}

const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
{
        mbfl_identify_filter *filter;
        const mbfl_encoding *encoding = NULL;
        int n;

        /* judge */
        if (identd != NULL) {
                n = identd->filter_list_size - 1;
                while (n >= 0) {
                        filter = identd->filter_list[n];
                        if (!filter->flag) {
                                if (!identd->strict || !filter->status) {
                                        encoding = filter->encoding;
                                }
                        }
                        n--;
                }
 
                /* fallback judge */
                if (!encoding) {
                        n = identd->filter_list_size - 1;
                        while (n >= 0) {
                                filter = identd->filter_list[n];
                                if (!filter->flag) {
                                        encoding = filter->encoding;
                                }
                                n--;
                        }
                }
        }

        return encoding;
}

enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
{
        const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
        return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
}


/*
 * encoding converter
 */
mbfl_string *
mbfl_convert_encoding(
    mbfl_string *string,
    mbfl_string *result,
    enum mbfl_no_encoding toenc)
{
        int n;
        unsigned char *p;
        const mbfl_encoding *encoding;
        mbfl_memory_device device;
        mbfl_convert_filter *filter1;
        mbfl_convert_filter *filter2;

        /* initialize */
        encoding = mbfl_no2encoding(toenc);
        if (encoding == NULL || string == NULL || result == NULL) {
                return NULL;
        }

        filter1 = NULL;
        filter2 = NULL;
        if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
                filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
        } else {
                filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
                if (filter2 != NULL) {
                        filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
                        if (filter1 == NULL) {
                                mbfl_convert_filter_delete(filter2);
                        }
                }
        }
        if (filter1 == NULL) {
                return NULL;
        }

        if (filter2 != NULL) {
                filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
                filter2->illegal_substchar = 0x3f;              /* '?' */
        }

        mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);

        /* feed data */
        n = string->len;
        p = string->val;
        if (p != NULL) {
                while (n > 0) {
                        if ((*filter1->filter_function)(*p++, filter1) < 0) {
                                break;
                        }
                        n--;
                }
        }

        mbfl_convert_filter_flush(filter1);
        mbfl_convert_filter_delete(filter1);
        if (filter2 != NULL) {
                mbfl_convert_filter_flush(filter2);
                mbfl_convert_filter_delete(filter2);
        }

        return mbfl_memory_device_result(&device, result);
}


/*
 * identify encoding
 */
const mbfl_encoding *
mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
{
        int i, n, num, bad;
        unsigned char *p;
        mbfl_identify_filter *flist, *filter;
        const mbfl_encoding *encoding;

        /* flist is an array of mbfl_identify_filter instances */
        flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
        if (flist == NULL) {
                return NULL;
        }

        num = 0;
        if (elist != NULL) {
                for (i = 0; i < elistsz; i++) {
                        if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
                                num++;
                        }
                }
        }

        /* feed data */
        n = string->len;
        p = string->val;

        if (p != NULL) {
                bad = 0;
                while (n > 0) {
                        for (i = 0; i < num; i++) {
                                filter = &flist[i];
                                if (!filter->flag) {
                                        (*filter->filter_function)(*p, filter);
                                        if (filter->flag) {
                                                bad++;
                                        }
                                }
                        }
                        if ((num - 1) <= bad && !strict) {
                                break;
                        }
                        p++;
                        n--;
                }
        }

        /* judge */
        encoding = NULL;

        for (i = 0; i < num; i++) {
                filter = &flist[i];
                if (!filter->flag) {
                        if (strict && filter->status) {
                                continue;
                        }
                        encoding = filter->encoding;
                        break;
                }
        }

        /* fall-back judge */
        if (!encoding) {
                for (i = 0; i < num; i++) {
                        filter = &flist[i];
                        if (!filter->flag && (!strict || !filter->status)) {
                                encoding = filter->encoding;
                                break;
                        }
                }
        }
 
        /* cleanup */
        /* dtors should be called in reverse order */
        i = num; while (--i >= 0) {
                mbfl_identify_filter_cleanup(&flist[i]);
        }

        mbfl_free((void *)flist);

        return encoding;
}

const mbfl_encoding *
mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
{
        int i, n, num, bad;
        unsigned char *p;
        mbfl_identify_filter *flist, *filter;
        const mbfl_encoding *encoding;

        /* flist is an array of mbfl_identify_filter instances */
        flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
        if (flist == NULL) {
                return NULL;
        }

        num = 0;
        if (elist != NULL) {
                for (i = 0; i < elistsz; i++) {
                        if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
                                num++;
                        }
                }
        }

        /* feed data */
        n = string->len;
        p = string->val;

        if (p != NULL) {
                bad = 0;
                while (n > 0) {
                        for (i = 0; i < num; i++) {
                                filter = &flist[i];
                                if (!filter->flag) {
                                        (*filter->filter_function)(*p, filter);
                                        if (filter->flag) {
                                                bad++;
                                        }
                                }
                        }
                        if ((num - 1) <= bad && !strict) {
                                break;
                        }
                        p++;
                        n--;
                }
        }

        /* judge */
        encoding = NULL;

        for (i = 0; i < num; i++) {
                filter = &flist[i];
                if (!filter->flag) {
                        if (strict && filter->status) {
                                continue;
                        }
                        encoding = filter->encoding;
                        break;
                }
        }

        /* fall-back judge */
        if (!encoding) {
                for (i = 0; i < num; i++) {
                        filter = &flist[i];
                        if (!filter->flag && (!strict || !filter->status)) {
                                encoding = filter->encoding;
                                break;
                        }
                }
        }
 
        /* cleanup */
        /* dtors should be called in reverse order */
        i = num; while (--i >= 0) {
                mbfl_identify_filter_cleanup(&flist[i]);
        }

        mbfl_free((void *)flist);

        return encoding;
}

/*
 *  strlen
 */
static int
filter_count_output(int c, void *data)
{
        (*(int *)data)++;
        return c;
}

int
mbfl_strlen(mbfl_string *string)
{
        int len, n, m, k;
        unsigned char *p;
        const unsigned char *mbtab;
        const mbfl_encoding *encoding;

        encoding = mbfl_no2encoding(string->no_encoding);
        if (encoding == NULL || string == NULL) {
                return -1;
        }

        len = 0;
        if (encoding->flag & MBFL_ENCTYPE_SBCS) {
                len = string->len;
        } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
                len = string->len/2;
        } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
                len = string->len/4;
        } else if (encoding->mblen_table != NULL) {
                mbtab = encoding->mblen_table;
                n = 0;
                p = string->val;
                k = string->len;
                /* count */
                if (p != NULL) {
                        while (n < k) {
                                m = mbtab[*p];
                                n += m;
                                p += m;
                                len++;
                        };
                }
        } else {
                /* wchar filter */
                mbfl_convert_filter *filter = mbfl_convert_filter_new(
                  string->no_encoding, 
                  mbfl_no_encoding_wchar,
                  filter_count_output, 0, &len);
                if (filter == NULL) {
                        return -1;
                }
                /* count */
                n = string->len;
                p = string->val;
                if (p != NULL) {
                        while (n > 0) {
                                (*filter->filter_function)(*p++, filter);
                                n--;
                        }
                }
                mbfl_convert_filter_delete(filter);
        }

        return len;
}

 
/*
 *  strpos
 */
struct collector_strpos_data {
        mbfl_convert_filter *next_filter;
        mbfl_wchar_device needle;
        int needle_len;
        int start;
        int output;
        int found_pos;
        int needle_pos;
        int matched_pos;
};

static int
collector_strpos(int c, void* data)
{
        int *p, *h, *m, n;
        struct collector_strpos_data *pc = (struct collector_strpos_data*)data;

        if (pc->output >= pc->start) {
                if (c == (int)pc->needle.buffer[pc->needle_pos]) {
                        if (pc->needle_pos == 0) {
                                pc->found_pos = pc->output;                     /* found position */
                        }
                        pc->needle_pos++;                                               /* needle pointer */
                        if (pc->needle_pos >= pc->needle_len) {
                                pc->matched_pos = pc->found_pos;        /* matched position */
                                pc->needle_pos--;
                                goto retry;
                        }
                } else if (pc->needle_pos != 0) {
retry:
                        h = (int *)pc->needle.buffer;
                        h++;
                        for (;;) {
                                pc->found_pos++;
                                p = h;
                                m = (int *)pc->needle.buffer;
                                n = pc->needle_pos - 1;
                                while (n > 0 && *p == *m) {
                                        n--;
                                        p++;
                                        m++;
                                }
                                if (n <= 0) {
                                        if (*m != c) {
                                                pc->needle_pos = 0;
                                        }
                                        break;
                                } else {
                                        h++;
                                        pc->needle_pos--;
                                }
                        }
                }
        }

        pc->output++;
        return c;
}

/*
 *      oddlen
 */
int 
mbfl_oddlen(mbfl_string *string)
{
        int len, n, m, k;
        unsigned char *p;
        const unsigned char *mbtab;
        const mbfl_encoding *encoding;


        if (string == NULL) {
                return -1;
        }
        encoding = mbfl_no2encoding(string->no_encoding);
        if (encoding == NULL) {
                return -1;
        }

        len = 0;
        if (encoding->flag & MBFL_ENCTYPE_SBCS) {
                return 0;
        } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
                return len % 2;
        } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
                return len % 4;
        } else if (encoding->mblen_table != NULL) {
                mbtab = encoding->mblen_table;
                n = 0;
                p = string->val;
                k = string->len;
                /* count */
                if (p != NULL) {
                        while (n < k) {
                                m = mbtab[*p];
                                n += m;
                                p += m;
                        };
                }
                return n-k;
        } else {
                /* how can i do ? */
                return 0;
        }
        /* NOT REACHED */
}

int
mbfl_strpos(
    mbfl_string *haystack,
    mbfl_string *needle,
    int offset,
    int reverse)
{
        int result;
        mbfl_string _haystack_u8, _needle_u8;
        const mbfl_string *haystack_u8, *needle_u8;
        const unsigned char *u8_tbl;

        if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
                return -8;
        }

        {
                const mbfl_encoding *u8_enc;
                u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
                if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
                        return -8;
                }
                u8_tbl = u8_enc->mblen_table;
        }

        if (haystack->no_encoding != mbfl_no_encoding_utf8) {
                mbfl_string_init(&_haystack_u8);
                haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
                if (haystack_u8 == NULL) {
                        result = -4;
                        goto out;
                }
        } else {
                haystack_u8 = haystack;
        }

        if (needle->no_encoding != mbfl_no_encoding_utf8) {
                mbfl_string_init(&_needle_u8);
                needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
                if (needle_u8 == NULL) {
                        result = -4;
                        goto out;
                }
        } else {
                needle_u8 = needle;
        }

        if (needle_u8->len < 1) {
                result = -8;
                goto out;
        }

        result = -1;
        if (haystack_u8->len < needle_u8->len) {
                goto out;
        }

        if (!reverse) {
                unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
                unsigned int needle_u8_len = needle_u8->len;
                unsigned int i;
                const unsigned char *p, *q, *e;
                const unsigned char *haystack_u8_val = haystack_u8->val,
                                    *needle_u8_val = needle_u8->val;
                for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
                        jtbl[i] = needle_u8_len + 1;
                }
                for (i = 0; i < needle_u8_len - 1; ++i) {
                        jtbl[needle_u8_val[i]] = needle_u8_len - i;
                }
                e = haystack_u8_val + haystack_u8->len;
                p = haystack_u8_val;
                while (--offset >= 0) {
                        if (p >= e) {
                                result = -16;
                                goto out;
                        }
                        p += u8_tbl[*p];
                }
                p += needle_u8_len;
                if (p > e) {
                        goto out;
                }
                while (p <= e) {
                        const unsigned char *pv = p;
                        q = needle_u8_val + needle_u8_len;
                        for (;;) {
                                if (q == needle_u8_val) {
                                        result = 0;
                                        while (p > haystack_u8_val) {
                                                unsigned char c = *--p;
                                                if (c < 0x80) {
                                                        ++result;
                                                } else if ((c & 0xc0) != 0x80) {
                                                        ++result;
                                                }       
                                        }
                                        goto out;
                                }
                                if (*--q != *--p) {
                                        break;
                                }
                        }
                        p += jtbl[*p];
                        if (p <= pv) {
                                p = pv + 1;
                        }
                }
        } else {
                unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
                unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
                unsigned int i;
                const unsigned char *p, *e, *q, *qe;
                const unsigned char *haystack_u8_val = haystack_u8->val,
                                    *needle_u8_val = needle_u8->val;
                for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
                        jtbl[i] = needle_u8_len;
                }
                for (i = needle_u8_len - 1; i > 0; --i) {
                        unsigned char c = needle_u8_val[i];
                        jtbl[c] = i;
                        if (c < 0x80) {
                                ++needle_len;
                        } else if ((c & 0xc0) != 0x80) {
                                ++needle_len;
                        }
                }
                {
                        unsigned char c = needle_u8_val[0];
                        if (c < 0x80) {
                                ++needle_len;
                        } else if ((c & 0xc0) != 0x80) {
                                ++needle_len;
                        }
                }
                e = haystack_u8_val;
                p = e + haystack_u8->len;
                qe = needle_u8_val + needle_u8_len;
                if (offset < 0) {
                        if (-offset > needle_len) {
                                offset += needle_len; 
                                while (offset < 0) {
                                        unsigned char c;
                                        if (p <= e) {
                                                result = -16;
                                                goto out;
                                        }
                                        c = *(--p);
                                        if (c < 0x80) {
                                                ++offset;
                                        } else if ((c & 0xc0) != 0x80) {
                                                ++offset;
                                        }
                                }
                        }
                } else {
                        const unsigned char *ee = haystack_u8_val + haystack_u8->len;
                        while (--offset >= 0) {
                                if (e >= ee) {
                                        result = -16;
                                        goto out;
                                }
                                e += u8_tbl[*e];
                        }
                }
                if (p < e + needle_u8_len) {
                        goto out;
                }
                p -= needle_u8_len;
                while (p >= e) {
                        const unsigned char *pv = p;
                        q = needle_u8_val;
                        for (;;) {
                                if (q == qe) {
                                        result = 0;
                                        p -= needle_u8_len;
                                        while (p > haystack_u8_val) {
                                                unsigned char c = *--p;
                                                if (c < 0x80) {
                                                        ++result;
                                                } else if ((c & 0xc0) != 0x80) {
                                                        ++result;
                                                }       
                                        }
                                        goto out;
                                }
                                if (*q != *p) {
                                        break;
                                }
                                ++p, ++q;
                        }
                        p -= jtbl[*p];
                        if (p >= pv) {
                                p = pv - 1;
                        }
                }
        }
out:
        if (haystack_u8 == &_haystack_u8) {
                mbfl_string_clear(&_haystack_u8);
        }
        if (needle_u8 == &_needle_u8) {
                mbfl_string_clear(&_needle_u8);
        }
        return result;
}

/*
 *  substr_count
 */

int
mbfl_substr_count(
    mbfl_string *haystack,
    mbfl_string *needle
   )
{
        int n, result = 0;
        unsigned char *p;
        mbfl_convert_filter *filter;
        struct collector_strpos_data pc;

        if (haystack == NULL || needle == NULL) {
                return -8;
        }
        /* needle is converted into wchar */
        mbfl_wchar_device_init(&pc.needle);
        filter = mbfl_convert_filter_new(
          needle->no_encoding,
          mbfl_no_encoding_wchar,
          mbfl_wchar_device_output, 0, &pc.needle);
        if (filter == NULL) {
                return -4;
        }
        p = needle->val;
        n = needle->len;
        if (p != NULL) {
                while (n > 0) {
                        if ((*filter->filter_function)(*p++, filter) < 0) {
                                break;
                        }
                        n--;
                }
        }
        mbfl_convert_filter_flush(filter);
        mbfl_convert_filter_delete(filter);
        pc.needle_len = pc.needle.pos;
        if (pc.needle.buffer == NULL) {
                return -4;
        }
        if (pc.needle_len <= 0) {
                mbfl_wchar_device_clear(&pc.needle);
                return -2;
        }
        /* initialize filter and collector data */
        filter = mbfl_convert_filter_new(
          haystack->no_encoding,
          mbfl_no_encoding_wchar,
          collector_strpos, 0, &pc);
        if (filter == NULL) {
                mbfl_wchar_device_clear(&pc.needle);
                return -4;
        }
        pc.start = 0;
        pc.output = 0;
        pc.needle_pos = 0;
        pc.found_pos = 0;
        pc.matched_pos = -1;

        /* feed data */
        p = haystack->val;
        n = haystack->len;
        if (p != NULL) {
                while (n > 0) {
                        if ((*filter->filter_function)(*p++, filter) < 0) {
                                pc.matched_pos = -4;
                                break;
                        }
                        if (pc.matched_pos >= 0) {
                                ++result;
                                pc.matched_pos = -1;
                                pc.needle_pos = 0;
                        }
                        n--;
                }
        }
        mbfl_convert_filter_flush(filter);
        mbfl_convert_filter_delete(filter);
        mbfl_wchar_device_clear(&pc.needle);

        return result;
}

/*
 *  substr
 */
struct collector_substr_data {
        mbfl_convert_filter *next_filter;
        int start;
        int stop;
        int output;
};

static int
collector_substr(int c, void* data)
{
        struct collector_substr_data *pc = (struct collector_substr_data*)data;

        if (pc->output >= pc->stop) {
                return -1;
        }

        if (pc->output >= pc->start) {
                (*pc->next_filter->filter_function)(c, pc->next_filter);
        }

        pc->output++;

        return c;
}

mbfl_string *
mbfl_substr(
    mbfl_string *string,
    mbfl_string *result,
    int from,
    int length)
{
        const mbfl_encoding *encoding;
        int n, m, k, len, start, end;
        unsigned char *p, *w;
        const unsigned char *mbtab;

        encoding = mbfl_no2encoding(string->no_encoding);
        if (encoding == NULL || string == NULL || result == NULL) {
                return NULL;
        }
        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = string->no_encoding;

        if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
           encoding->mblen_table != NULL) {
                len = string->len;
                start = from;
                end = from + length;
                if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
                        start *= 2;
                        end = start + length*2;
                } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
                        start *= 4;
                        end = start + length*4;
                } else if (encoding->mblen_table != NULL) {
                        mbtab = encoding->mblen_table;
                        start = 0;
                        end = 0;
                        n = 0;
                        k = 0;
                        p = string->val;
                        if (p != NULL) {
                                /* search start position */
                                while (k <= from) {
                                        start = n;
                                        if (n >= len) {
                                                break;
                                        }
                                        m = mbtab[*p];
                                        n += m;
                                        p += m;
                                        k++;
                                }
                                /* detect end position */
                                k = 0;
                                end = start;
                                while (k < length) {
                                        end = n;
                                        if (n >= len) {
                                                break;
                                        }
                                        m = mbtab[*p];
                                        n += m;
                                        p += m;
                                        k++;
                                }
                        }
                }

                if (start > len) {
                        start = len;
                }
                if (start < 0) {
                        start = 0;
                }
                if (end > len) {
                        end = len;
                }
                if (end < 0) {
                        end = 0;
                }
                if (start > end) {
                        start = end;
                }

                /* allocate memory and copy */
                n = end - start;
                result->len = 0;
                result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
                if (w != NULL) {
                        p = string->val;
                        if (p != NULL) {
                                p += start;
                                result->len = n;
                                while (n > 0) {
                                        *w++ = *p++;
                                        n--;
                                }
                        }
                        *w++ = '\0';
                        *w++ = '\0';
                        *w++ = '\0';
                        *w = '\0';
                } else {
                        result = NULL;
                }
        } else {
                mbfl_memory_device device;
                struct collector_substr_data pc;
                mbfl_convert_filter *decoder;
                mbfl_convert_filter *encoder;

                mbfl_memory_device_init(&device, length + 1, 0);
                mbfl_string_init(result);
                result->no_language = string->no_language;
                result->no_encoding = string->no_encoding;
                /* output code filter */
                decoder = mbfl_convert_filter_new(
                    mbfl_no_encoding_wchar,
                    string->no_encoding,
                    mbfl_memory_device_output, 0, &device);
                /* wchar filter */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_substr, 0, &pc);
                if (decoder == NULL || encoder == NULL) {
                        mbfl_convert_filter_delete(encoder);
                        mbfl_convert_filter_delete(decoder);
                        return NULL;
                }
                pc.next_filter = decoder;
                pc.start = from;
                pc.stop = from + length;
                pc.output = 0;

                /* feed data */
                p = string->val;
                n = string->len;
                if (p != NULL) {
                        while (n > 0) {
                                if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                        break;
                                }
                                n--;
                        }
                }

                mbfl_convert_filter_flush(encoder);
                mbfl_convert_filter_flush(decoder);
                result = mbfl_memory_device_result(&device, result);
                mbfl_convert_filter_delete(encoder);
                mbfl_convert_filter_delete(decoder);
        }

        return result;
}

/*
 *  strcut
 */
mbfl_string *
mbfl_strcut(
    mbfl_string *string,
    mbfl_string *result,
    int from,
    int length)
{
        const mbfl_encoding *encoding;
        mbfl_memory_device device;

        /* validate the parameters */
        if (string == NULL || string->val == NULL || result == NULL) {
                return NULL;
        }

        if (from < 0 || length < 0) {
                return NULL;
        }

        if (from >= string->len) {
                from = string->len;
        }

        encoding = mbfl_no2encoding(string->no_encoding);
        if (encoding == NULL) {
                return NULL;
        }

        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = string->no_encoding;

        if ((encoding->flag & (MBFL_ENCTYPE_SBCS
                                | MBFL_ENCTYPE_WCS2BE
                                | MBFL_ENCTYPE_WCS2LE
                                | MBFL_ENCTYPE_WCS4BE
                                | MBFL_ENCTYPE_WCS4LE))
                        || encoding->mblen_table != NULL) {
                const unsigned char *start = NULL;
                const unsigned char *end = NULL;
                unsigned char *w;
                unsigned int sz;

                if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
                        from &= -2;

                        if (from + length >= string->len) {
                                length = string->len - from;
                        }

                        start = string->val + from;
                        end   = start + (length & -2);
                } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
                        from &= -4;

                        if (from + length >= string->len) {
                                length = string->len - from;
                        }

                        start = string->val + from;
                        end   = start + (length & -4);
                } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
                        if (from + length >= string->len) {
                                length = string->len - from;
                        }

                        start = string->val + from;
                        end = start + length;
                } else if (encoding->mblen_table != NULL) {
                        const unsigned char *mbtab = encoding->mblen_table;
                        const unsigned char *p, *q;
                        int m;

                        /* search start position */
                        for (m = 0, p = string->val, q = p + from;
                                        p < q; p += (m = mbtab[*p]));

                        if (p > q) {
                                p -= m;
                        }

                        start = p;

                        /* search end position */
                        if ((start - string->val) + length >= (int)string->len) {
                                end = string->val + string->len;
                        } else {
                                for (q = p + length; p < q; p += (m = mbtab[*p]));

                                if (p > q) {
                                        p -= m;
                                }
                                end = p;
                        }
                } else {
                        /* never reached */
                        return NULL;
                }

                /* allocate memory and copy string */
                sz = end - start;
                if ((w = (unsigned char*)mbfl_calloc(sz + 8,
                                sizeof(unsigned char))) == NULL) {
                        return NULL;
                }

                memcpy(w, start, sz);
                w[sz] = '\0';
                w[sz + 1] = '\0';
                w[sz + 2] = '\0';
                w[sz + 3] = '\0';

                result->val = w;
                result->len = sz;
        } else {
                mbfl_convert_filter *encoder     = NULL;
                mbfl_convert_filter *decoder     = NULL;
                const unsigned char *p, *q, *r;
                struct {
                        mbfl_convert_filter encoder;
                        mbfl_convert_filter decoder;
                        const unsigned char *p;
                        int pos;
                } bk, _bk;

                /* output code filter */
                if (!(decoder = mbfl_convert_filter_new(
                                mbfl_no_encoding_wchar,
                                string->no_encoding,
                                mbfl_memory_device_output, 0, &device))) {
                        return NULL;
                }

                /* wchar filter */
                if (!(encoder = mbfl_convert_filter_new(
                                string->no_encoding,
                                mbfl_no_encoding_wchar,
                                mbfl_filter_output_null,
                                NULL, NULL))) {
                        mbfl_convert_filter_delete(decoder);
                        return NULL;
                }

                mbfl_memory_device_init(&device, length + 8, 0);

                p = string->val;

                /* search start position */
                for (q = string->val + from; p < q; p++) {
                        (*encoder->filter_function)(*p, encoder);
                }

                /* switch the drain direction */
                encoder->output_function = (int(*)(int,void *))decoder->filter_function;
                encoder->flush_function = (int(*)(void *))decoder->filter_flush;
                encoder->data = decoder;

                q = string->val + string->len;

                /* save the encoder, decoder state and the pointer */
                mbfl_convert_filter_copy(decoder, &_bk.decoder);
                mbfl_convert_filter_copy(encoder, &_bk.encoder);
                _bk.p = p;
                _bk.pos = device.pos;

                if (length > q - p) {
                        length = q - p;
                }

                if (length >= 20) {
                        /* output a little shorter than "length" */
                        /* XXX: the constant "20" was determined purely on the heuristics. */
                        for (r = p + length - 20; p < r; p++) {
                                (*encoder->filter_function)(*p, encoder);
                        }

                        /* if the offset of the resulting string exceeds the length,
                         * then restore the state */
                        if (device.pos > length) {
                                p = _bk.p;
                                device.pos = _bk.pos;
                                decoder->filter_dtor(decoder);
                                encoder->filter_dtor(encoder);
                                mbfl_convert_filter_copy(&_bk.decoder, decoder);
                                mbfl_convert_filter_copy(&_bk.encoder, encoder);
                                bk = _bk;
                        } else {
                                /* save the encoder, decoder state and the pointer */
                                mbfl_convert_filter_copy(decoder, &bk.decoder);
                                mbfl_convert_filter_copy(encoder, &bk.encoder);
                                bk.p = p;
                                bk.pos = device.pos;

                                /* flush the stream */
                                (*encoder->filter_flush)(encoder);

                                /* if the offset of the resulting string exceeds the length,
                                 * then restore the state */
                                if (device.pos > length) {
                                        bk.decoder.filter_dtor(&bk.decoder);
                                        bk.encoder.filter_dtor(&bk.encoder);

                                        p = _bk.p;
                                        device.pos = _bk.pos;
                                        decoder->filter_dtor(decoder);
                                        encoder->filter_dtor(encoder);
                                        mbfl_convert_filter_copy(&_bk.decoder, decoder);
                                        mbfl_convert_filter_copy(&_bk.encoder, encoder);
                                        bk = _bk;
                                } else {
                                        _bk.decoder.filter_dtor(&_bk.decoder);
                                        _bk.encoder.filter_dtor(&_bk.encoder);

                                        p = bk.p;
                                        device.pos = bk.pos;
                                        decoder->filter_dtor(decoder);
                                        encoder->filter_dtor(encoder);
                                        mbfl_convert_filter_copy(&bk.decoder, decoder);
                                        mbfl_convert_filter_copy(&bk.encoder, encoder);
                                }
                        }
                } else {
                        bk = _bk;
                }

                /* detect end position */
                while (p < q) {
                        (*encoder->filter_function)(*p, encoder);

                        if (device.pos > length) {
                                /* restore filter */
                                p = bk.p;
                                device.pos = bk.pos;
                                decoder->filter_dtor(decoder);
                                encoder->filter_dtor(encoder);
                                mbfl_convert_filter_copy(&bk.decoder, decoder);
                                mbfl_convert_filter_copy(&bk.encoder, encoder);
                                break;
                        }

                        p++;

                        /* backup current state */
                        mbfl_convert_filter_copy(decoder, &_bk.decoder);
                        mbfl_convert_filter_copy(encoder, &_bk.encoder);
                        _bk.pos = device.pos;
                        _bk.p = p;

                        (*encoder->filter_flush)(encoder);

                        if (device.pos > length) {
                                _bk.decoder.filter_dtor(&_bk.decoder);
                                _bk.encoder.filter_dtor(&_bk.encoder);

                                /* restore filter */
                                p = bk.p;
                                device.pos = bk.pos;
                                decoder->filter_dtor(decoder);
                                encoder->filter_dtor(encoder);
                                mbfl_convert_filter_copy(&bk.decoder, decoder);
                                mbfl_convert_filter_copy(&bk.encoder, encoder);
                                break;
                        }

                        bk.decoder.filter_dtor(&bk.decoder);
                        bk.encoder.filter_dtor(&bk.encoder);

                        p = _bk.p;
                        device.pos = _bk.pos;
                        decoder->filter_dtor(decoder);
                        encoder->filter_dtor(encoder);
                        mbfl_convert_filter_copy(&_bk.decoder, decoder);
                        mbfl_convert_filter_copy(&_bk.encoder, encoder);

                        bk = _bk;
                }

                (*encoder->filter_flush)(encoder);

                bk.decoder.filter_dtor(&bk.decoder);
                bk.encoder.filter_dtor(&bk.encoder);

                result = mbfl_memory_device_result(&device, result);

                mbfl_convert_filter_delete(encoder);
                mbfl_convert_filter_delete(decoder);
        }

        return result;
}


/*
 *  strwidth
 */
static int is_fullwidth(int c)
{
        int i;

        if (c < mbfl_eaw_table[0].begin) {
                return 0;
        }

        for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
                if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
                        return 1;
                }
        }

        return 0;
}

static int
filter_count_width(int c, void* data)
{
        (*(int *)data) += (is_fullwidth(c) ? 2: 1);
        return c;
}

int
mbfl_strwidth(mbfl_string *string)
{
        int len, n;
        unsigned char *p;
        mbfl_convert_filter *filter;

        len = 0;
        if (string->len > 0 && string->val != NULL) {
                /* wchar filter */
                filter = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    filter_count_width, 0, &len);
                if (filter == NULL) {
                        mbfl_convert_filter_delete(filter);
                        return -1;
                }

                /* feed data */
                p = string->val;
                n = string->len;
                while (n > 0) {
                        (*filter->filter_function)(*p++, filter);
                        n--;
                }

                mbfl_convert_filter_flush(filter);
                mbfl_convert_filter_delete(filter);
        }

        return len;
}


/*
 *  strimwidth
 */
struct collector_strimwidth_data {
        mbfl_convert_filter *decoder;
        mbfl_convert_filter *decoder_backup;
        mbfl_memory_device device;
        int from;
        int width;
        int outwidth;
        int outchar;
        int status;
        int endpos;
};

static int
collector_strimwidth(int c, void* data)
{
        struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;

        switch (pc->status) {
        case 10:
                (*pc->decoder->filter_function)(c, pc->decoder);
                break;
        default:
                if (pc->outchar >= pc->from) {
                        pc->outwidth += (is_fullwidth(c) ? 2: 1);

                        if (pc->outwidth > pc->width) {
                                if (pc->status == 0) {
                                        pc->endpos = pc->device.pos;
                                        mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
                                }
                                pc->status++;
                                (*pc->decoder->filter_function)(c, pc->decoder);
                                c = -1;
                        } else {
                                (*pc->decoder->filter_function)(c, pc->decoder);
                        }
                }
                pc->outchar++;
                break;
        }

        return c;
}

mbfl_string *
mbfl_strimwidth(
    mbfl_string *string,
    mbfl_string *marker,
    mbfl_string *result,
    int from,
    int width)
{
        struct collector_strimwidth_data pc;
        mbfl_convert_filter *encoder;
        int n, mkwidth;
        unsigned char *p;

        if (string == NULL || result == NULL) {
                return NULL;
        }
        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = string->no_encoding;
        mbfl_memory_device_init(&pc.device, width, 0);

        /* output code filter */
        pc.decoder = mbfl_convert_filter_new(
            mbfl_no_encoding_wchar,
            string->no_encoding,
            mbfl_memory_device_output, 0, &pc.device);
        pc.decoder_backup = mbfl_convert_filter_new(
            mbfl_no_encoding_wchar,
            string->no_encoding,
            mbfl_memory_device_output, 0, &pc.device);
        /* wchar filter */
        encoder = mbfl_convert_filter_new(
            string->no_encoding,
            mbfl_no_encoding_wchar,
            collector_strimwidth, 0, &pc);
        if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
                mbfl_convert_filter_delete(encoder);
                mbfl_convert_filter_delete(pc.decoder);
                mbfl_convert_filter_delete(pc.decoder_backup);
                return NULL;
        }
        mkwidth = 0;
        if (marker) {
                mkwidth = mbfl_strwidth(marker);
        }
        pc.from = from;
        pc.width = width - mkwidth;
        pc.outwidth = 0;
        pc.outchar = 0;
        pc.status = 0;
        pc.endpos = 0;

        /* feed data */
        p = string->val;
        n = string->len;
        if (p != NULL) {
                while (n > 0) {
                        n--;
                        if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                break;
                        }
                }
                mbfl_convert_filter_flush(encoder);
                if (pc.status != 0 && mkwidth > 0) {
                        pc.width += mkwidth;
                        while (n > 0) {
                                if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                        break;
                                }
                                n--;
                        }
                        mbfl_convert_filter_flush(encoder);
                        if (pc.status != 1) {
                                pc.status = 10;
                                pc.device.pos = pc.endpos;
                                mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
                                mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
                                p = marker->val;
                                n = marker->len;
                                while (n > 0) {
                                        if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                                break;
                                        }
                                        n--;
                                }
                                mbfl_convert_filter_flush(encoder);
                        }
                } else if (pc.status != 0) {
                        pc.device.pos = pc.endpos;
                        mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
                }
                mbfl_convert_filter_flush(pc.decoder);
        }
        result = mbfl_memory_device_result(&pc.device, result);
        mbfl_convert_filter_delete(encoder);
        mbfl_convert_filter_delete(pc.decoder);
        mbfl_convert_filter_delete(pc.decoder_backup);

        return result;
}

mbfl_string *
mbfl_ja_jp_hantozen(
    mbfl_string *string,
    mbfl_string *result,
    int mode)
{
        int n;
        unsigned char *p;
        const mbfl_encoding *encoding;
        mbfl_memory_device device;
        mbfl_convert_filter *decoder = NULL;
        mbfl_convert_filter *encoder = NULL;
        mbfl_convert_filter *tl_filter = NULL;
        mbfl_convert_filter *next_filter = NULL;
        mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;

        /* validate parameters */
        if (string == NULL || result == NULL) {
                return NULL;
        }

        encoding = mbfl_no2encoding(string->no_encoding);
        if (encoding == NULL) {
                return NULL;
        }

        mbfl_memory_device_init(&device, string->len, 0);
        mbfl_string_init(result);

        result->no_language = string->no_language;
        result->no_encoding = string->no_encoding;

        decoder = mbfl_convert_filter_new(
                mbfl_no_encoding_wchar,
                string->no_encoding,
                mbfl_memory_device_output, 0, &device);
        if (decoder == NULL) {
                goto out;
        }
        next_filter = decoder;

        param =
                (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
        if (param == NULL) {
                goto out;
        }

        param->mode = mode;

        tl_filter = mbfl_convert_filter_new2(
                &vtbl_tl_jisx0201_jisx0208,
                (int(*)(int, void*))next_filter->filter_function,
                (int(*)(void*))next_filter->filter_flush,
                next_filter);
        if (tl_filter == NULL) {
                mbfl_free(param);
                goto out;
        }

        tl_filter->opaque = param;
        next_filter = tl_filter;

        encoder = mbfl_convert_filter_new(
                string->no_encoding,
                mbfl_no_encoding_wchar,
                (int(*)(int, void*))next_filter->filter_function,
                (int(*)(void*))next_filter->filter_flush,
                next_filter);   
        if (encoder == NULL) {
                goto out;
        }

        /* feed data */
        p = string->val;
        n = string->len;
        if (p != NULL) {
                while (n > 0) {
                        if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                break;
                        }
                        n--;
                }
        }

        mbfl_convert_filter_flush(encoder);
        result = mbfl_memory_device_result(&device, result);
out:
        if (tl_filter != NULL) {
                if (tl_filter->opaque != NULL) {
                        mbfl_free(tl_filter->opaque);
                }
                mbfl_convert_filter_delete(tl_filter);
        }

        if (decoder != NULL) {
                mbfl_convert_filter_delete(decoder);
        }

        if (encoder != NULL) {
                mbfl_convert_filter_delete(encoder);
        }

        return result;
}


/*
 *  MIME header encode
 */
struct mime_header_encoder_data {
        mbfl_convert_filter *conv1_filter;
        mbfl_convert_filter *block_filter;
        mbfl_convert_filter *conv2_filter;
        mbfl_convert_filter *conv2_filter_backup;
        mbfl_convert_filter *encod_filter;
        mbfl_convert_filter *encod_filter_backup;
        mbfl_memory_device outdev;
        mbfl_memory_device tmpdev;
        int status1;
        int status2;
        int prevpos;
        int linehead;
        int firstindent;
        int encnamelen;
        int lwsplen;
        char encname[128];
        char lwsp[16];
};

static int
mime_header_encoder_block_collector(int c, void *data)
{
        int n;
        struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;

        switch (pe->status2) {
        case 1: /* encoded word */
                pe->prevpos = pe->outdev.pos;
                mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
                mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
                (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
                (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
                (*pe->encod_filter->filter_flush)(pe->encod_filter);
                n = pe->outdev.pos - pe->linehead + pe->firstindent;
                pe->outdev.pos = pe->prevpos;
                mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
                mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
                if (n >= 74) {
                        (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
                        (*pe->encod_filter->filter_flush)(pe->encod_filter);
                        mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
                        mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
                        pe->linehead = pe->outdev.pos;
                        pe->firstindent = 0;
                        mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
                        c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
                } else {
                        c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
                }
                break;

        default:
                mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
                c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
                pe->status2 = 1;
                break;
        }

        return c;
}

static int
mime_header_encoder_collector(int c, void *data)
{
        static int qp_table[256] = {
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
                1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
                0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
        };

        int n;
        struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;

        switch (pe->status1) {
        case 11:        /* encoded word */
                (*pe->block_filter->filter_function)(c, pe->block_filter);
                break;

        default:        /* ASCII */
                if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
                        mbfl_memory_device_output(c, &pe->tmpdev);
                        pe->status1 = 1;
                } else if (pe->status1 == 0 && c == 0x20) {     /* repeat SPACE */
                        mbfl_memory_device_output(c, &pe->tmpdev);
                } else {
                        if (pe->tmpdev.pos < 74 && c == 0x20) {
                                n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
                                if (n > 74) {
                                        mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);         /* LWSP */
                                        pe->linehead = pe->outdev.pos;
                                        pe->firstindent = 0;
                                } else if (pe->outdev.pos > 0) {
                                        mbfl_memory_device_output(0x20, &pe->outdev);
                                }
                                mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
                                mbfl_memory_device_reset(&pe->tmpdev);
                                pe->status1 = 0;
                        } else {
                                n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
                                if (n > 60)  {
                                        mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);         /* LWSP */
                                        pe->linehead = pe->outdev.pos;
                                        pe->firstindent = 0;
                                } else if (pe->outdev.pos > 0)  {
                                        mbfl_memory_device_output(0x20, &pe->outdev);
                                }
                                mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
                                mbfl_memory_device_reset(&pe->tmpdev);
                                (*pe->block_filter->filter_function)(c, pe->block_filter);
                                pe->status1 = 11;
                        }
                }
                break;
        }

        return c;
}

mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
{
        if (pe->status1 >= 10) {
                (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
                (*pe->encod_filter->filter_flush)(pe->encod_filter);
                mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);         /* ?= */
        } else if (pe->tmpdev.pos > 0) {
                if (pe->outdev.pos > 0) {
                        if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
                                mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
                        } else {
                                mbfl_memory_device_output(0x20, &pe->outdev);
                        }
                }
                mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
        }
        mbfl_memory_device_reset(&pe->tmpdev);
        pe->prevpos = 0;
        pe->linehead = 0;
        pe->status1 = 0;
        pe->status2 = 0;

        return mbfl_memory_device_result(&pe->outdev, result);
}

struct mime_header_encoder_data*
mime_header_encoder_new(
    enum mbfl_no_encoding incode,
    enum mbfl_no_encoding outcode,
    enum mbfl_no_encoding transenc)
{
        int n;
        const char *s;
        const mbfl_encoding *outencoding;
        struct mime_header_encoder_data *pe;

        /* get output encoding and check MIME charset name */
        outencoding = mbfl_no2encoding(outcode);
        if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
                return NULL;
        }

        pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
        if (pe == NULL) {
                return NULL;
        }

        mbfl_memory_device_init(&pe->outdev, 0, 0);
        mbfl_memory_device_init(&pe->tmpdev, 0, 0);
        pe->prevpos = 0;
        pe->linehead = 0;
        pe->firstindent = 0;
        pe->status1 = 0;
        pe->status2 = 0;

        /* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
        n = 0;
        pe->encname[n++] = 0x3d;
        pe->encname[n++] = 0x3f;
        s = outencoding->mime_name;
        while (*s) {
                pe->encname[n++] = *s++;
        }
        pe->encname[n++] = 0x3f;
        if (transenc == mbfl_no_encoding_qprint) {
                pe->encname[n++] = 0x51;
        } else {
                pe->encname[n++] = 0x42;
                transenc = mbfl_no_encoding_base64;
        }
        pe->encname[n++] = 0x3f;
        pe->encname[n] = '\0';
        pe->encnamelen = n;

        n = 0;
        pe->lwsp[n++] = 0x0d;
        pe->lwsp[n++] = 0x0a;
        pe->lwsp[n++] = 0x20;
        pe->lwsp[n] = '\0';
        pe->lwsplen = n;

        /* transfer encode filter */
        pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
        pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));

        /* Output code filter */
        pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
        pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);

        /* encoded block filter */
        pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);

        /* Input code filter */
        pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);

        if (pe->encod_filter == NULL ||
            pe->encod_filter_backup == NULL ||
            pe->conv2_filter == NULL ||
            pe->conv2_filter_backup == NULL ||
            pe->conv1_filter == NULL) {
                mime_header_encoder_delete(pe);
                return NULL;
        }

        if (transenc == mbfl_no_encoding_qprint) {
                pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
                pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
        } else {
                pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
                pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
        }

        return pe;
}

void
mime_header_encoder_delete(struct mime_header_encoder_data *pe)
{
        if (pe) {
                mbfl_convert_filter_delete(pe->conv1_filter);
                mbfl_convert_filter_delete(pe->block_filter);
                mbfl_convert_filter_delete(pe->conv2_filter);
                mbfl_convert_filter_delete(pe->conv2_filter_backup);
                mbfl_convert_filter_delete(pe->encod_filter);
                mbfl_convert_filter_delete(pe->encod_filter_backup);
                mbfl_memory_device_clear(&pe->outdev);
                mbfl_memory_device_clear(&pe->tmpdev);
                mbfl_free((void*)pe);
        }
}

int
mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
{
        return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
}

mbfl_string *
mbfl_mime_header_encode(
    mbfl_string *string,
    mbfl_string *result,
    enum mbfl_no_encoding outcode,
    enum mbfl_no_encoding encoding,
    const char *linefeed,
    int indent)
{
        int n;
        unsigned char *p;
        struct mime_header_encoder_data *pe;

        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = mbfl_no_encoding_ascii;

        pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
        if (pe == NULL) {
                return NULL;
        }

        if (linefeed != NULL) {
                n = 0;
                while (*linefeed && n < 8) {
                        pe->lwsp[n++] = *linefeed++;
                }
                pe->lwsp[n++] = 0x20;
                pe->lwsp[n] = '\0';
                pe->lwsplen = n;
        }
        if (indent > 0 && indent < 74) {
                pe->firstindent = indent;
        }

        n = string->len;
        p = string->val;
        while (n > 0) {
                (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
                n--;
        }

        result = mime_header_encoder_result(pe, result);
        mime_header_encoder_delete(pe);

        return result;
}


/*
 *  MIME header decode
 */
struct mime_header_decoder_data {
        mbfl_convert_filter *deco_filter;
        mbfl_convert_filter *conv1_filter;
        mbfl_convert_filter *conv2_filter;
        mbfl_memory_device outdev;
        mbfl_memory_device tmpdev;
        int cspos;
        int status;
        enum mbfl_no_encoding encoding;
        enum mbfl_no_encoding incode;
        enum mbfl_no_encoding outcode;
};

static int
mime_header_decoder_collector(int c, void* data)
{
        const mbfl_encoding *encoding;
        struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;

        switch (pd->status) {
        case 1:
                if (c == 0x3f) {                /* ? */
                        mbfl_memory_device_output(c, &pd->tmpdev);
                        pd->cspos = pd->tmpdev.pos;
                        pd->status = 2;
                } else {
                        mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                        mbfl_memory_device_reset(&pd->tmpdev);
                        if (c == 0x3d) {                /* = */
                                mbfl_memory_device_output(c, &pd->tmpdev);
                        } else if (c == 0x0d || c == 0x0a) {    /* CR or LF */
                                pd->status = 9;
                        } else {
                                (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
                                pd->status = 0;
                        }
                }
                break;
        case 2:         /* store charset string */
                if (c == 0x3f) {                /* ? */
                        /* identify charset */
                        mbfl_memory_device_output('\0', &pd->tmpdev);
                        encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
                        if (encoding != NULL) {
                                pd->incode = encoding->no_encoding;
                                pd->status = 3;
                        }
                        mbfl_memory_device_unput(&pd->tmpdev);
                        mbfl_memory_device_output(c, &pd->tmpdev);
                } else {
                        mbfl_memory_device_output(c, &pd->tmpdev);
                        if (pd->tmpdev.pos > 100) {             /* too long charset string */
                                pd->status = 0;
                        } else if (c == 0x0d || c == 0x0a) {    /* CR or LF */
                                mbfl_memory_device_unput(&pd->tmpdev);
                                pd->status = 9;
                        }
                        if (pd->status != 2) {
                                mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                                mbfl_memory_device_reset(&pd->tmpdev);
                        }
                }
                break;
        case 3:         /* identify encoding */
                mbfl_memory_device_output(c, &pd->tmpdev);
                if (c == 0x42 || c == 0x62) {           /* 'B' or 'b' */
                        pd->encoding = mbfl_no_encoding_base64;
                        pd->status = 4;
                } else if (c == 0x51 || c == 0x71) {    /* 'Q' or 'q' */
                        pd->encoding = mbfl_no_encoding_qprint;
                        pd->status = 4;
                } else {
                        if (c == 0x0d || c == 0x0a) {   /* CR or LF */
                                mbfl_memory_device_unput(&pd->tmpdev);
                                pd->status = 9;
                        } else {
                                pd->status = 0;
                        }
                        mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                        mbfl_memory_device_reset(&pd->tmpdev);
                }
                break;
        case 4:         /* reset filter */
                mbfl_memory_device_output(c, &pd->tmpdev);
                if (c == 0x3f) {                /* ? */
                        /* charset convert filter */
                        mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
                        /* decode filter */
                        mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
                        pd->status = 5;
                } else {
                        if (c == 0x0d || c == 0x0a) {   /* CR or LF */
                                mbfl_memory_device_unput(&pd->tmpdev);
                                pd->status = 9;
                        } else {
                                pd->status = 0;
                        }
                        mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                }
                mbfl_memory_device_reset(&pd->tmpdev);
                break;
        case 5:         /* encoded block */
                if (c == 0x3f) {                /* ? */
                        pd->status = 6;
                } else {
                        (*pd->deco_filter->filter_function)(c, pd->deco_filter);
                }
                break;
        case 6:         /* check end position */
                if (c == 0x3d) {                /* = */
                        /* flush and reset filter */
                        (*pd->deco_filter->filter_flush)(pd->deco_filter);
                        (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
                        mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
                        pd->status = 7;
                } else {
                        (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
                        if (c != 0x3f) {                /* ? */
                                (*pd->deco_filter->filter_function)(c, pd->deco_filter);
                                pd->status = 5;
                        }
                }
                break;
        case 7:         /* after encoded block */
                if (c == 0x0d || c == 0x0a) {   /* CR LF */
                        pd->status = 8;
                } else {
                        mbfl_memory_device_output(c, &pd->tmpdev);
                        if (c == 0x3d) {                /* = */
                                pd->status = 1;
                        } else if (c != 0x20 && c != 0x09) {            /* not space */
                                mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                                mbfl_memory_device_reset(&pd->tmpdev);
                                pd->status = 0;
                        }
                }
                break;
        case 8:         /* folding */
        case 9:         /* folding */
                if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
                        if (c == 0x3d) {                /* = */
                                if (pd->status == 8) {
                                        mbfl_memory_device_output(0x20, &pd->tmpdev);   /* SPACE */
                                } else {
                                        (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
                                }
                                mbfl_memory_device_output(c, &pd->tmpdev);
                                pd->status = 1;
                        } else {
                                mbfl_memory_device_output(0x20, &pd->tmpdev);
                                mbfl_memory_device_output(c, &pd->tmpdev);
                                mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                                mbfl_memory_device_reset(&pd->tmpdev);
                                pd->status = 0;
                        }
                }
                break;
        default:                /* non encoded block */
                if (c == 0x0d || c == 0x0a) {   /* CR LF */
                        pd->status = 9;
                } else if (c == 0x3d) {         /* = */
                        mbfl_memory_device_output(c, &pd->tmpdev);
                        pd->status = 1;
                } else {
                        (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
                }
                break;
        }

        return c;
}

mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
{
        switch (pd->status) {
        case 1:
        case 2:
        case 3:
        case 4:
        case 7:
        case 8:
        case 9:
                mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
                break;
        case 5:
        case 6:
                (*pd->deco_filter->filter_flush)(pd->deco_filter);
                (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
                break;
        }
        (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
        mbfl_memory_device_reset(&pd->tmpdev);
        pd->status = 0;

        return mbfl_memory_device_result(&pd->outdev, result);
}

struct mime_header_decoder_data*
mime_header_decoder_new(enum mbfl_no_encoding outcode)
{
        struct mime_header_decoder_data *pd;

        pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
        if (pd == NULL) {
                return NULL;
        }

        mbfl_memory_device_init(&pd->outdev, 0, 0);
        mbfl_memory_device_init(&pd->tmpdev, 0, 0);
        pd->cspos = 0;
        pd->status = 0;
        pd->encoding = mbfl_no_encoding_pass;
        pd->incode = mbfl_no_encoding_ascii;
        pd->outcode = outcode;
        /* charset convert filter */
        pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
        pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
        /* decode filter */
        pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);

        if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
                mime_header_decoder_delete(pd);
                return NULL;
        }

        return pd;
}

void
mime_header_decoder_delete(struct mime_header_decoder_data *pd)
{
        if (pd) {
                mbfl_convert_filter_delete(pd->conv2_filter);
                mbfl_convert_filter_delete(pd->conv1_filter);
                mbfl_convert_filter_delete(pd->deco_filter);
                mbfl_memory_device_clear(&pd->outdev);
                mbfl_memory_device_clear(&pd->tmpdev);
                mbfl_free((void*)pd);
        }
}

int
mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
{
        return mime_header_decoder_collector(c, pd);
}

mbfl_string *
mbfl_mime_header_decode(
    mbfl_string *string,
    mbfl_string *result,
    enum mbfl_no_encoding outcode)
{
        int n;
        unsigned char *p;
        struct mime_header_decoder_data *pd;

        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = outcode;

        pd = mime_header_decoder_new(outcode);
        if (pd == NULL) {
                return NULL;
        }

        /* feed data */
        n = string->len;
        p = string->val;
        while (n > 0) {
                mime_header_decoder_collector(*p++, pd);
                n--;
        }

        result = mime_header_decoder_result(pd, result);
        mime_header_decoder_delete(pd);

        return result;
}



/*
 *  convert HTML numeric entity
 */
struct collector_htmlnumericentity_data {
        mbfl_convert_filter *decoder;
        int status;
        int cache;
        int digit;
        int *convmap;
        int mapsize;
};

static int
collector_encode_htmlnumericentity(int c, void *data)
{
        struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
        int f, n, s, r, d, size, *mapelm;

        size = pc->mapsize;
        f = 0;
        n = 0;
        while (n < size) {
                mapelm = &(pc->convmap[n*4]);
                if (c >= mapelm[0] && c <= mapelm[1]) {
                        s = (c + mapelm[2]) & mapelm[3];
                        if (s >= 0) {
                                (*pc->decoder->filter_function)(0x26, pc->decoder);     /* '&' */
                                (*pc->decoder->filter_function)(0x23, pc->decoder);     /* '#' */
                                r = 100000000;
                                s %= r;
                                while (r > 0) {
                                        d = s/r;
                                        if (d || f) {
                                                f = 1;
                                                s %= r;
                                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                                        }
                                        r /= 10;
                                }
                                if (!f) {
                                        f = 1;
                                        (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
                                }
                                (*pc->decoder->filter_function)(0x3b, pc->decoder);             /* ';' */
                        }
                }
                if (f) {
                        break;
                }
                n++;
        }
        if (!f) {
                (*pc->decoder->filter_function)(c, pc->decoder);
        }

        return c;
}

static int
collector_decode_htmlnumericentity(int c, void *data)
{
        struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
        int f, n, s, r, d, size, *mapelm;

        switch (pc->status) {
        case 1:
                if (c == 0x23) {        /* '#' */
                        pc->status = 2;
                } else {
                        pc->status = 0;
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        case 2:
                if (c == 0x78) {        /* 'x' */
                        pc->status = 4;
                } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
                        pc->cache = c - 0x30;
                        pc->status = 3;
                        pc->digit = 1;
                } else {
                        pc->status = 0;
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        case 3:
                s = 0;
                f = 0;
                if (c >= 0x30 && c <= 0x39) {   /* '0' - '9' */
                        if (pc->digit > 9) {
                                pc->status = 0;
                                s = pc->cache;
                                f = 1;
                        } else {
                                s = pc->cache*10 + c - 0x30;
                                pc->cache = s;
                                pc->digit++;
                        }
                } else {
                        pc->status = 0;
                        s = pc->cache;
                        f = 1;
                        n = 0;
                        size = pc->mapsize;
                        while (n < size) {
                                mapelm = &(pc->convmap[n*4]);
                                d = s - mapelm[2];
                                if (d >= mapelm[0] && d <= mapelm[1]) {
                                        f = 0;
                                        (*pc->decoder->filter_function)(d, pc->decoder);
                                        if (c != 0x3b) {        /* ';' */
                                                (*pc->decoder->filter_function)(c, pc->decoder);
                                        }
                                        break;
                                }
                                n++;
                        }
                }
                if (f) {
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        r = 1;
                        n = pc->digit;
                        while (n > 0) {
                                r *= 10;
                                n--;
                        }
                        s %= r;
                        r /= 10;
                        while (r > 0) {
                                d = s/r;
                                s %= r;
                                r /= 10;
                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                        }
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        case 4:
                if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
                        pc->cache = c - 0x30;
                        pc->status = 5;
                        pc->digit = 1;
                } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
                        pc->cache = c - 0x41 + 10;
                        pc->status = 5;
                        pc->digit = 1;
                } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
                        pc->cache = c - 0x61 + 10;
                        pc->status = 5;
                        pc->digit = 1;
                } else {
                        pc->status = 0;
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        case 5:
                s = 0;
                f = 0;
                if ((c >= 0x30 && c <= 0x39) ||
                        (c >= 0x41 && c <= 0x46) ||
                        (c >= 0x61 && c <= 0x66)) {     /* '0' - '9' or 'a' - 'f'  */
                        if (pc->digit > 9) {
                                pc->status = 0;
                                s = pc->cache;
                                f = 1;
                        } else {
                                if (c >= 0x30 && c <= 0x39) {
                                        s = pc->cache*16 + (c - 0x30);
                                } else if (c >= 0x41 && c <= 0x46)  {
                                        s = pc->cache*16 + (c - 0x41 + 10);
                                } else {
                                        s = pc->cache*16 + (c - 0x61 + 10);
                                }
                                pc->cache = s;
                                pc->digit++;
                        }
                } else {
                        pc->status = 0;
                        s = pc->cache;
                        f = 1;
                        n = 0;
                        size = pc->mapsize;
                        while (n < size) {
                                mapelm = &(pc->convmap[n*4]);
                                d = s - mapelm[2];
                                if (d >= mapelm[0] && d <= mapelm[1]) {
                                        f = 0;
                                        (*pc->decoder->filter_function)(d, pc->decoder);
                                        if (c != 0x3b) {        /* ';' */
                                                (*pc->decoder->filter_function)(c, pc->decoder);
                                        }
                                        break;
                                }
                                n++;
                        }
                }
                if (f) {
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
                        r = 1;
                        n = pc->digit;
                        while (n > 0) {
                                r *= 16;
                                n--;
                        }
                        s %= r;
                        r /= 16;
                        while (r > 0) {
                                d = s/r;
                                s %= r;
                                r /= 16;
                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                        }
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        default:
                if (c == 0x26) {        /* '&' */
                        pc->status = 1;
                } else {
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
        }

        return c;
}

static int
collector_encode_hex_htmlnumericentity(int c, void *data)
{
        struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
        int f, n, s, r, d, size, *mapelm;

        size = pc->mapsize;
        f = 0;
        n = 0;
        while (n < size) {
                mapelm = &(pc->convmap[n*4]);
                if (c >= mapelm[0] && c <= mapelm[1]) {
                        s = (c + mapelm[2]) & mapelm[3];
                        if (s >= 0) {
                                (*pc->decoder->filter_function)(0x26, pc->decoder);     /* '&' */
                                (*pc->decoder->filter_function)(0x23, pc->decoder);     /* '#' */
                                (*pc->decoder->filter_function)(0x78, pc->decoder);     /* 'x' */
                                r = 0x1000000;
                                s %= r;
                                while (r > 0) {
                                        d = s/r;
                                        if (d || f) {
                                                f = 1;
                                                s %= r;
                                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                                        }
                                        r /= 16;
                                }
                                if (!f) {
                                        f = 1;
                                        (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
                                }
                                (*pc->decoder->filter_function)(0x3b, pc->decoder);             /* ';' */
                        }
                }
                if (f) {
                        break;
                }
                n++;
        }
        if (!f) {
                (*pc->decoder->filter_function)(c, pc->decoder);
        }

        return c;
}

int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
{
        struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
        int n, s, r, d;

        if (pc->status) {
                switch (pc->status) {
                case 1: /* '&' */
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        break;
                case 2: /* '#' */
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        break;
                case 3: /* '0'-'9' */
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */

                        s = pc->cache;
                        r = 1;
                        n = pc->digit;
                        while (n > 0) {
                                r *= 10;
                                n--;
                        }
                        s %= r;
                        r /= 10;
                        while (r > 0) {
                                d = s/r;
                                s %= r;
                                r /= 10;
                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                        }

                        break;
                case 4: /* 'x' */
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
                        break;
                case 5: /* '0'-'9','a'-'f' */
                        (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
                        (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
                        (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */

                        s = pc->cache;
                        r = 1;
                        n = pc->digit;
                        while (n > 0) {
                                r *= 16;
                                n--;
                        }
                        s %= r;
                        r /= 16;
                        while (r > 0) {
                                d = s/r;
                                s %= r;
                                r /= 16;
                                (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
                        }
                        break;
                default:
                        break;
                }
        }

        pc->status = 0;
        pc->cache = 0;
        pc->digit = 0;

        return 0;
}


mbfl_string *
mbfl_html_numeric_entity(
    mbfl_string *string,
    mbfl_string *result,
    int *convmap,
    int mapsize,
    int type)
{
        struct collector_htmlnumericentity_data pc;
        mbfl_memory_device device;
        mbfl_convert_filter *encoder;
        int n;
        unsigned char *p;

        if (string == NULL || result == NULL) {
                return NULL;
        }
        mbfl_string_init(result);
        result->no_language = string->no_language;
        result->no_encoding = string->no_encoding;
        mbfl_memory_device_init(&device, string->len, 0);

        /* output code filter */
        pc.decoder = mbfl_convert_filter_new(
            mbfl_no_encoding_wchar,
            string->no_encoding,
            mbfl_memory_device_output, 0, &device);
        /* wchar filter */
        if (type == 0) { /* decimal output */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_encode_htmlnumericentity, 0, &pc);
        } else if (type == 2) { /* hex output */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_encode_hex_htmlnumericentity, 0, &pc);
        } else { /* type == 1: decimal/hex input */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_decode_htmlnumericentity,
                        (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
        }
        if (pc.decoder == NULL || encoder == NULL) {
                mbfl_convert_filter_delete(encoder);
                mbfl_convert_filter_delete(pc.decoder);
                return NULL;
        }
        pc.status = 0;
        pc.cache = 0;
        pc.digit = 0;
        pc.convmap = convmap;
        pc.mapsize = mapsize;

        /* feed data */
        p = string->val;
        n = string->len;
        if (p != NULL) {
                while (n > 0) {
                        if ((*encoder->filter_function)(*p++, encoder) < 0) {
                                break;
                        }
                        n--;
                }
        }
        mbfl_convert_filter_flush(encoder);
        mbfl_convert_filter_flush(pc.decoder);
        result = mbfl_memory_device_result(&device, result);
        mbfl_convert_filter_delete(encoder);
        mbfl_convert_filter_delete(pc.decoder);

        return result;
}

/*
 * Local variables:
 * tab-width: 4
 * c-basic-offset: 4
 * End:
 */

/* [<][>][^][v][top][bottom][index][help] */