/* Ocradlib - Optical Character Recognition library Copyright (C) 2009-2019 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library. If not, see . */ #include #include #include #include #include #include #include "ocradlib.h" #include "common.h" #include "rectangle.h" #include "ucs.h" #include "track.h" #include "bitmap.h" #include "blob.h" #include "character.h" #include "page_image.h" #include "textline.h" #include "textblock.h" #include "textpage.h" struct OCRAD_Descriptor { Page_image * page_image; Textpage * textpage; OCRAD_Errno ocr_errno; Control control; std::string text; OCRAD_Descriptor() : page_image( 0 ), textpage( 0 ), ocr_errno( OCRAD_ok ) { control.outfile = 0; } }; bool verify_descriptor( OCRAD_Descriptor * const ocrdes, const bool result = false ) { if( !ocrdes ) return false; if( !ocrdes->page_image || ( result && !ocrdes->textpage ) ) { ocrdes->ocr_errno = OCRAD_sequence_error; return false; } return true; } const char * OCRAD_version() { return OCRAD_version_string; } OCRAD_Descriptor * OCRAD_open() { verbosity = -1; // keep library silent OCRAD_Descriptor * const ocrdes = new( std::nothrow ) OCRAD_Descriptor; if( !ocrdes ) return 0; return ocrdes; } int OCRAD_close( OCRAD_Descriptor * const ocrdes ) { if( !ocrdes ) return -1; if( ocrdes->textpage ) delete ocrdes->textpage; if( ocrdes->page_image ) delete ocrdes->page_image; delete ocrdes; return 0; } OCRAD_Errno OCRAD_get_errno( OCRAD_Descriptor * const ocrdes ) { if( !ocrdes ) return OCRAD_bad_argument; return ocrdes->ocr_errno; } int OCRAD_set_image( OCRAD_Descriptor * const ocrdes, const OCRAD_Pixmap * const image, const bool invert ) { if( !ocrdes ) return -1; if( !image || image->height < 3 || image->width < 3 || INT_MAX / image->width < image->height || ( image->mode != OCRAD_bitmap && image->mode != OCRAD_greymap && image->mode != OCRAD_colormap ) ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } try { Page_image * const page_image = new Page_image( *image, invert ); if( ocrdes->textpage ) { delete ocrdes->textpage; ocrdes->textpage = 0; } if( ocrdes->page_image ) delete ocrdes->page_image; ocrdes->page_image = page_image; } catch( std::bad_alloc & ) { ocrdes->ocr_errno = OCRAD_mem_error; return -1; } return 0; } int OCRAD_set_image_from_file( OCRAD_Descriptor * const ocrdes, const char * const filename, const bool invert ) { if( !ocrdes ) return -1; FILE * infile = 0; if( filename && filename[0] ) { if( std::strcmp( filename, "-" ) == 0 ) infile = stdin; else infile = std::fopen( filename, "rb" ); } if( !infile ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } int retval = 0; try { Page_image * const page_image = new Page_image( infile, invert ); if( ocrdes->textpage ) { delete ocrdes->textpage; ocrdes->textpage = 0; } if( ocrdes->page_image ) delete ocrdes->page_image; ocrdes->page_image = page_image; } catch( std::bad_alloc & ) { ocrdes->ocr_errno = OCRAD_mem_error; retval = -1; } catch( ... ) { ocrdes->ocr_errno = OCRAD_bad_argument; retval = -1; } std::fclose( infile ); return retval; } int OCRAD_set_utf8_format( OCRAD_Descriptor * const ocrdes, const bool utf8 ) { if( !verify_descriptor( ocrdes ) ) return -1; ocrdes->control.utf8 = utf8; return 0; } int OCRAD_set_threshold( OCRAD_Descriptor * const ocrdes, const int threshold ) { if( !verify_descriptor( ocrdes ) ) return -1; if( threshold < -1 || threshold > 255 ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } ocrdes->page_image->threshold( threshold ); return 0; } int OCRAD_scale( OCRAD_Descriptor * const ocrdes, const int value ) { if( !verify_descriptor( ocrdes ) ) return -1; int retval = 0; try { if( !ocrdes->page_image->change_scale( value ) ) retval = -1; } catch( ... ) { retval = -1; } if( retval < 0 ) ocrdes->ocr_errno = OCRAD_bad_argument; return retval; } int OCRAD_recognize( OCRAD_Descriptor * const ocrdes, const bool layout ) { if( !verify_descriptor( ocrdes ) ) return -1; Textpage * const textpage = new( std::nothrow ) Textpage( *ocrdes->page_image, "", ocrdes->control, layout ); if( !textpage ) { ocrdes->ocr_errno = OCRAD_mem_error; return -1; } if( ocrdes->textpage ) delete ocrdes->textpage; ocrdes->textpage = textpage; return 0; } int OCRAD_result_blocks( OCRAD_Descriptor * const ocrdes ) { if( !verify_descriptor( ocrdes, true ) ) return -1; return ocrdes->textpage->textblocks(); } int OCRAD_result_lines( OCRAD_Descriptor * const ocrdes, const int blocknum ) { if( !verify_descriptor( ocrdes, true ) ) return -1; if( blocknum < 0 || blocknum >= ocrdes->textpage->textblocks() ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } return ocrdes->textpage->textblock( blocknum ).textlines(); } int OCRAD_result_chars_total( OCRAD_Descriptor * const ocrdes ) { if( !verify_descriptor( ocrdes, true ) ) return -1; int c = 0; for( int b = 0; b < ocrdes->textpage->textblocks(); ++b ) for( int i = 0; i < ocrdes->textpage->textblock( b ).textlines(); ++i ) c += ocrdes->textpage->textblock( b ).textline( i ).characters(); return c; } int OCRAD_result_chars_block( OCRAD_Descriptor * const ocrdes, const int blocknum ) { if( !verify_descriptor( ocrdes, true ) ) return -1; if( blocknum < 0 || blocknum >= ocrdes->textpage->textblocks() ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } int c = 0; for( int i = 0; i < ocrdes->textpage->textblock( blocknum ).textlines(); ++i ) c += ocrdes->textpage->textblock( blocknum ).textline( i ).characters(); return c; } int OCRAD_result_chars_line( OCRAD_Descriptor * const ocrdes, const int blocknum, const int linenum ) { if( !verify_descriptor( ocrdes, true ) ) return -1; if( blocknum < 0 || blocknum >= ocrdes->textpage->textblocks() || linenum < 0 || linenum >= ocrdes->textpage->textblock( blocknum ).textlines() ) { ocrdes->ocr_errno = OCRAD_bad_argument; return -1; } return ocrdes->textpage->textblock( blocknum ).textline( linenum ).characters(); } const char * OCRAD_result_line( OCRAD_Descriptor * const ocrdes, const int blocknum, const int linenum ) { if( !verify_descriptor( ocrdes, true ) ) return 0; if( blocknum < 0 || blocknum >= ocrdes->textpage->textblocks() || linenum < 0 || linenum >= ocrdes->textpage->textblock( blocknum ).textlines() ) { ocrdes->ocr_errno = OCRAD_bad_argument; return 0; } const Textline & textline = ocrdes->textpage->textblock( blocknum ).textline( linenum ); ocrdes->text.clear(); if( !ocrdes->control.utf8 ) for( int i = 0; i < textline.characters(); ++i ) ocrdes->text += textline.character( i ).byte_result(); else for( int i = 0; i < textline.characters(); ++i ) ocrdes->text += textline.character( i ).utf8_result(); ocrdes->text += '\n'; return ocrdes->text.c_str(); } int OCRAD_result_first_character( OCRAD_Descriptor * const ocrdes ) { if( !verify_descriptor( ocrdes, true ) ) return -1; int ch = 0; if( ocrdes->textpage->textblocks() > 0 && ocrdes->textpage->textblock( 0 ).textlines() > 0 ) { const Character & character = ocrdes->textpage->textblock( 0 ).textline( 0 ).character( 0 ); if( character.guesses() ) { if( !ocrdes->control.utf8 ) ch = UCS::map_to_byte( character.guess( 0 ).code ); else ch = character.guess( 0 ).code; } } return ch; }