root/lib/devices/ocr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ocr_setparameter
  2. ocr_startpage
  3. ocr_startclip
  4. ocr_endclip
  5. ocr_stroke
  6. ocr_fill
  7. ocr_fillbitmap
  8. ocr_fillgradient
  9. ocr_addfont
  10. ocr_drawchar
  11. ocr_drawlink
  12. ocr_result_write
  13. ocr_result_save
  14. ocr_result_get
  15. ocr_result_destroy
  16. ocr_endpage
  17. ocr_finish
  18. gfxdevice_ocr_init

/* ocr.c

   Part of the swftools package.

   Copyright (c) 2007 Matthias Kramm <kramm@quiss.org> 
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <unistd.h>
#include <memory.h>
#include "../types.h"
#include "../mem.h"
#include "../gfxdevice.h"
#include "../gfxtools.h"
#include "render.h"

#include "../gocr/pnm.h"
#include "../gocr/pgm2asc.h"

typedef struct _textpage {
    char*text;
    int textpos;
    struct _textpage*next;
} textpage_t;

typedef struct _internal {
    gfxdevice_t*render;
    int pages;
    
    textpage_t*first_page;
    textpage_t*current_page;
} internal_t;

int ocr_setparameter(gfxdevice_t*dev, const char*key, const char*value)
{
    internal_t*i = (internal_t*)dev->internal;
    return i->render->setparameter(i->render,key,value);
}

void ocr_startpage(gfxdevice_t*dev, int width, int height) 
{ 
    internal_t*i = (internal_t*)dev->internal;
    if(i->render) {
        fprintf(stderr, "Call endpage() before calling startpage()\n");
        return;
    }
    i->render = malloc(sizeof(gfxdevice_t));
    gfxdevice_render_init(i->render);
    i->render->startpage(i->render,width,height); 
    i->pages++;
}
/* passthrough */
void ocr_startclip(gfxdevice_t*dev, gfxline_t*line) { ((internal_t*)dev->internal)->render->startclip(((internal_t*)dev->internal)->render,line); }
void ocr_endclip(gfxdevice_t*dev) { ((internal_t*)dev->internal)->render->endclip(((internal_t*)dev->internal)->render); }
void ocr_stroke(gfxdevice_t*dev, gfxline_t*line, gfxcoord_t width, gfxcolor_t*color, gfx_capType cap_style, gfx_joinType joint_style, gfxcoord_t miterLimit) { ((internal_t*)dev->internal)->render->stroke(((internal_t*)dev->internal)->render, line, width, color, cap_style, joint_style, miterLimit); }
void ocr_fill(gfxdevice_t*dev, gfxline_t*line, gfxcolor_t*color) { ((internal_t*)dev->internal)->render->fill(((internal_t*)dev->internal)->render, line, color); }
void ocr_fillbitmap(gfxdevice_t*dev, gfxline_t*line, gfximage_t*img, gfxmatrix_t*matrix, gfxcxform_t*cxform) { ((internal_t*)dev->internal)->render->fillbitmap(((internal_t*)dev->internal)->render, line, img, matrix, cxform); }
void ocr_fillgradient(gfxdevice_t*dev, gfxline_t*line, gfxgradient_t*gradient, gfxgradienttype_t type, gfxmatrix_t*matrix) { ((internal_t*)dev->internal)->render->fillgradient(((internal_t*)dev->internal)->render, line, gradient, type, matrix); }
void ocr_addfont(gfxdevice_t*dev, gfxfont_t*font) { ((internal_t*)dev->internal)->render->addfont(((internal_t*)dev->internal)->render, font); }
void ocr_drawchar(gfxdevice_t*dev, gfxfont_t*font, int glyphnr, gfxcolor_t*color, gfxmatrix_t*matrix) { ((internal_t*)dev->internal)->render->drawchar(((internal_t*)dev->internal)->render, font, glyphnr, color, matrix); }
void ocr_drawlink(gfxdevice_t*dev, gfxline_t*line, const char*action) { ((internal_t*)dev->internal)->render->drawlink(((internal_t*)dev->internal)->render, line, action); }

void ocr_result_write(gfxresult_t*r, int filedesc)
{
    textpage_t*i= (textpage_t*)r->internal;
}
int ocr_result_save(gfxresult_t*r, const char*filename)
{
    textpage_t*i= (textpage_t*)r->internal;
    if(!i) {
        return 0; // no pages drawn
    }
    FILE*fi = fopen(filename, "wb");
    if(!fi)
        return 0;
    while(i) {
        fwrite(i->text, i->textpos, 1, fi);
        i = i->next;
    }
    fclose(fi);
    return 1;
}

void*ocr_result_get(gfxresult_t*r, const char*name)
{
    textpage_t*i= (textpage_t*)r->internal;
    if(!strcmp(name,"text")) {
        textpage_t*j = i;
        int len = 0;
        while(j) {
            len += i->textpos;
            j = j->next;
        }
        char*text = (char*)malloc(len);
        int pos = 0;
        j = i;
        while(j) {
            memcpy(&text[pos], i->text, i->textpos);
            pos += i->textpos;
            j = j->next;
        }
        text[pos] = 0;
        return text;
    } else if(!strncmp(name,"page",4)) {
        int pagenr = atoi(&name[4]);
        if(pagenr<0)
            pagenr=0;
        while(pagenr>0) {
            i = i->next;
            if(!i)
                return 0;
            pagenr++;
        }
        i->text[i->textpos] = 0;
        return strdup(i->text);
    }
    return 0;
}
void ocr_result_destroy(gfxresult_t*r)
{
    textpage_t*i= (textpage_t*)r->internal;
    int t;
    r->internal = 0;
    while(i) {
        textpage_t*next = i->next;
        free(i->text);i->text = 0;
        free(i);
        i = next;
    }
    free(r);
}

job_t*JOB;

void ocr_endpage(gfxdevice_t*dev) 
{ 
    internal_t*i = (internal_t*)dev->internal;
    i->render->endpage(i->render); 

    gfxdevice_t*out = i->render;
    gfxresult_t* r = out->finish(out);
    free(i->render);i->render = 0;

    gfximage_t*img = (gfximage_t*)r->get(r, "page");

    job_t job;
    JOB = &job;
    
    job_init(&job);
    job.cfg.out_format=UTF8;

    job.src.fname = "<none>";
    job.src.p.p = malloc(img->width*img->height);
    job.src.p.bpp = 1;
    job.src.p.x = img->width;
    job.src.p.y = img->height;
    int size=img->width*img->height;
    int t;
    for(t=0;t<size;t++) {
        job.src.p.p[t] = (img->data[t].r+img->data[t].g+img->data[t].b)/3;
    }

    pgm2asc(&job);

    int linecounter;
    const char *line = 0;
    int len = 0;
    linecounter = 0;
    line = getTextLine(linecounter++);
    while (line) {
      len += strlen(line)+1;
      line = getTextLine(linecounter++);
    }

    textpage_t*page = malloc(sizeof(textpage_t));
    page->next = 0;
    page->text = malloc(len+1);
    page->textpos = 0;
    if(!i->first_page) {
        i->first_page = i->current_page = page;
    } else {
        i->current_page->next = page;
        i->current_page = page;
    }

    linecounter = 0;
    line = getTextLine(linecounter++);
    while (line) {
      int l = strlen(line);
      memcpy(&page->text[page->textpos], line, l);
      page->textpos += l;
      page->text[page->textpos++] = '\n';

      line = getTextLine(linecounter++);
    }
    page->text[page->textpos++] = 0;

    free_textlines();

    job_free(&job);JOB=0;
    
    r->destroy(r);
}

gfxresult_t* ocr_finish(gfxdevice_t*dev)
{
    internal_t*i = (internal_t*)dev->internal;
    
    gfxresult_t*r = (gfxresult_t*)rfx_calloc(sizeof(gfxresult_t));
    
    r->internal = i->first_page;
    r->write = ocr_result_write;
    r->save = ocr_result_save;
    r->get = ocr_result_get;
    r->destroy = ocr_result_destroy;

    free(dev->internal); dev->internal = 0; i = 0;

    return r;
}

void gfxdevice_ocr_init(gfxdevice_t*dev, gfxdevice_t*out)
{
    internal_t*i = (internal_t*)rfx_calloc(sizeof(internal_t));
    memset(dev, 0, sizeof(gfxdevice_t));

    dev->name = "ocr";

    dev->internal = i;

    dev->setparameter = ocr_setparameter;
    dev->startpage = ocr_startpage;
    dev->startclip = ocr_startclip;
    dev->endclip = ocr_endclip;
    dev->stroke = ocr_stroke;
    dev->fill = ocr_fill;
    dev->fillbitmap = ocr_fillbitmap;
    dev->fillgradient = ocr_fillgradient;
    dev->addfont = ocr_addfont;
    dev->drawchar = ocr_drawchar;
    dev->drawlink = ocr_drawlink;
    dev->endpage = ocr_endpage;
    dev->finish = ocr_finish;

    i->pages = 0;
}


/* [<][>][^][v][top][bottom][index][help] */