/* GNU Ocrad - Optical Character Recognition program Copyright (C) 2003-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include "common.h" #include "rectangle.h" #include "segment.h" #include "ucs.h" #include "user_filter.h" #include "bitmap.h" #include "blob.h" #include "character.h" #include "profile.h" #include "feats.h" Character::Character( const Character & c ) : Rectangle( c ), blobpv( c.blobpv ), gv( c.gv ) { for( unsigned i = 0; i < blobpv.size(); ++i ) blobpv[i] = new Blob( *c.blobpv[i] ); } Character & Character::operator=( const Character & c ) { if( this != &c ) { Rectangle::operator=( c ); for( unsigned i = 0; i < blobpv.size(); ++i ) delete blobpv[i]; blobpv = c.blobpv; for( unsigned i = 0; i < blobpv.size(); ++i ) blobpv[i] = new Blob( *c.blobpv[i] ); gv = c.gv; } return *this; } Character::~Character() { for( unsigned i = 0; i < blobpv.size(); ++i ) delete blobpv[i]; } // Returns the filled area of the main blobs only (no recursive) // int Character::area() const { int a = 0; for( int i = 0; i < blobs(); ++i ) a += blobpv[i]->area(); return a; } const Blob & Character::blob( const int i ) const { if( i < 0 || i >= blobs() ) Ocrad::internal_error( "const blob, index out of bounds" ); return *blobpv[i]; } Blob & Character::blob( const int i ) { if( i < 0 || i >= blobs() ) Ocrad::internal_error( "blob, index out of bounds" ); return *blobpv[i]; } Blob & Character::main_blob() { int imax = 0; for( int i = 1; i < blobs(); ++i ) if( blobpv[i]->size() > blobpv[imax]->size() ) imax = i; return *blobpv[imax]; } void Character::shift_blobp( Blob * const p ) { add_rectangle( *p ); int i = blobs() - 1; for( ; i >= 0; --i ) { Blob & bi = *blobpv[i]; if( p->vcenter() > bi.vcenter() ) break; if( p->vcenter() == bi.vcenter() && p->hcenter() >= bi.hcenter() ) break; } blobpv.insert( blobpv.begin() + ( i + 1 ), p ); } void Character::insert_guess( const int i, const int code, const int value ) { if( i < 0 || i > guesses() ) Ocrad::internal_error( "insert_guess, index out of bounds" ); gv.insert( gv.begin() + i, Guess( code, value ) ); } void Character::delete_guess( const int i ) { if( i < 0 || i >= guesses() ) Ocrad::internal_error( "delete_guess, index out of bounds" ); gv.erase( gv.begin() + i ); } bool Character::set_merged_guess( const int code1, const int right1, const int code2, const int blob_index ) { if( blob_index < 0 || blob_index >= blobs() ) return false; const Blob & b = *blobpv[blob_index]; if( b.left() <= right1 && right1 < b.right() ) { only_guess( -(blob_index + 1), left() ); add_guess( code1, right1 ); add_guess( code2, right() ); return true; } return false; } void Character::swap_guesses( const int i, const int j ) { if( i < 0 || i >= guesses() || j < 0 || j >= guesses() ) Ocrad::internal_error( "swap_guesses, index out of bounds" ); const int code = gv[i].code; gv[i].code = gv[j].code; gv[j].code = code; } const Character::Guess & Character::guess( const int i ) const { if( i < 0 || i >= guesses() ) Ocrad::internal_error( "guess, index out of bounds" ); return gv[i]; } bool Character::maybe( const int code ) const { for( int i = 0; i < guesses(); ++i ) if( code == gv[i].code ) return true; return false; } /* bool Character::maybe_digit() const { for( int i = 0; i < guesses(); ++i ) if( UCS::isdigit( gv[i].code ) ) return true; return false; } bool Character::maybe_letter() const { for( int i = 0; i < guesses(); ++i ) if( UCS::isalpha( gv[i].code ) ) return true; return false; } */ void Character::join( Character & c ) { for( int i = 0; i < c.blobs(); ++i ) shift_blobp( c.blobpv[i] ); c.blobpv.clear(); } unsigned char Character::byte_result() const { if( guesses() ) { const unsigned char ch = UCS::map_to_byte( gv[0].code ); if( ch ) return ch; } return '_'; } const char * Character::utf8_result() const { if( guesses() ) { const char * s = UCS::ucs_to_utf8( gv[0].code ); if( *s ) return s; } return "_"; } void Character::print( const Control & control ) const { if( guesses() ) { if( !control.utf8 ) { unsigned char ch = UCS::map_to_byte( gv[0].code ); if( ch ) std::putc( ch, control.outfile ); } else if( gv[0].code ) std::fputs( UCS::ucs_to_utf8( gv[0].code ), control.outfile ); } else std::putc( '_', control.outfile ); } void Character::dprint( const Control & control, const Rectangle & charbox, const bool graph, const bool recursive ) const { if( graph || recursive ) std::fprintf( control.outfile, "%d guesses ", guesses() ); for( int i = 0; i < guesses(); ++i ) { if( gv[i].code == '\t' ) std::fprintf( control.outfile, "guess '\\t', confidence %d ", gv[i].value ); else if( !control.utf8 || !gv[i].code ) { unsigned char ch = UCS::map_to_byte( gv[i].code ); if( ch ) std::fprintf( control.outfile, "guess '%c', confidence %d ", ch, gv[i].value ); } else std::fprintf( control.outfile, "guess '%s', confidence %d ", UCS::ucs_to_utf8( gv[i].code ), gv[i].value ); if( !graph && !recursive ) break; } std::fputc( '\n', control.outfile ); if( graph ) { std::fprintf( control.outfile, "left = %d, top = %d, right = %d, bottom = %d\n", left(), top(), right(), bottom() ); std::fprintf( control.outfile, "width = %d, height = %d, hcenter = %d, vcenter = %d, black area = %d%%\n", width(), height(), hcenter(), vcenter(), ( area() * 100 ) / size() ); if( blobs() >= 1 && blobs() <= 3 ) { const Blob & b = blob( blobs() - 1 ); Features f( b ); std::fprintf( control.outfile, "hbars = %d, vbars = %d\n", f.hbars(), f.vbars() ); } std::fputc( '\n', control.outfile ); const int minrow = std::min( top(), charbox.top() ); const int maxrow = std::max( bottom(), charbox.bottom() ); for( int row = minrow; row <= maxrow; ++row ) { bool istop = ( row == top() ); bool isvc = ( row == vcenter() ); bool isbot = ( row == bottom() ); bool iscbtop = ( row == charbox.top() ); bool iscbvc = ( row == charbox.vcenter() ); bool iscbbot = ( row == charbox.bottom() ); bool ish1top = false, ish1bot = false, ish2top = false, ish2bot = false; if( blobs() == 1 && blobpv[0]->holes() ) { const Blob & b = *blobpv[0]; ish1top = ( row == b.hole(0).top() ); ish1bot = ( row == b.hole(0).bottom() ); if( b.holes() > 1 ) { ish2top = ( row == b.hole(1).top() ); ish2bot = ( row == b.hole(1).bottom() ); } } for( int col = left(); col <= right(); ++col ) { char ch = ( isvc && col == hcenter() ) ? '+' : '.'; for( int i = 0; i < blobs(); ++i ) { int id = blobpv[i]->id( row, col ); if( id != 0 ) { if( id > 0 ) ch = (ch == '+') ? 'C' : 'O'; else ch = (ch == '+') ? '=' : '-'; break; } } std::fprintf( control.outfile, " %c", ch ); } if( istop ) std::fprintf( control.outfile, " top(%d)", row ); if( isvc ) std::fprintf( control.outfile, " vcenter(%d)", row ); if( isbot ) std::fprintf( control.outfile, " bottom(%d)", row ); if( iscbtop ) std::fprintf( control.outfile, " box.top(%d)", row ); if( iscbvc ) std::fprintf( control.outfile, " box.vcenter(%d)", row ); if( iscbbot ) std::fprintf( control.outfile, " box.bottom(%d)", row ); if( ish1top ) std::fprintf( control.outfile, " h1.top(%d)", row ); if( ish1bot ) std::fprintf( control.outfile, " h1.bottom(%d)", row ); if( ish2top ) std::fprintf( control.outfile, " h2.top(%d)", row ); if( ish2bot ) std::fprintf( control.outfile, " h2.bottom(%d)", row ); std::fputc( '\n', control.outfile ); } std::fputs( "\n\n", control.outfile ); } } void Character::xprint( const Control & control ) const { std::fprintf( control.exportfile, "%3d %3d %2d %2d; %d", left(), top(), width(), height(), guesses() ); for( int i = 0; i < guesses(); ++i ) if( !control.utf8 || !gv[i].code ) { unsigned char ch = UCS::map_to_byte( gv[i].code ); if( !ch ) ch = '_'; std::fprintf( control.exportfile, ", '%c'%d", ch, gv[i].value ); } else std::fprintf( control.exportfile, ", '%s'%d", UCS::ucs_to_utf8( gv[i].code ), gv[i].value ); std::fputc( '\n', control.exportfile ); } void Character::apply_filter( const Filter::Type filter ) { if( !guesses() ) return; const int code = gv[0].code; bool remove = false; switch( filter ) { case Filter::letters_only: remove = true; // fall through case Filter::letters: if( !UCS::isalpha( code ) && !UCS::isspace( code ) ) { for( int i = 1; i < guesses(); ++i ) if( UCS::isalpha( gv[i].code ) ) { swap_guesses( 0, i ); break; } if( gv[0].code == '+' && 2 * height() > 3 * width() ) { gv[0].code = 't'; break; } if( !UCS::isalpha( gv[0].code ) ) gv[0].code = UCS::to_nearest_letter( gv[0].code ); if( remove && !UCS::isalpha( gv[0].code ) ) clear_guesses(); } break; case Filter::numbers_only: remove = true; // fall through case Filter::numbers: if( !UCS::isdigit( code ) && !UCS::isspace( code ) ) { for( int i = 1; i < guesses(); ++i ) if( UCS::isdigit( gv[i].code ) ) { swap_guesses( 0, i ); break; } if( !UCS::isdigit( gv[0].code ) ) gv[0].code = UCS::to_nearest_digit( gv[0].code ); if( remove && !UCS::isdigit( gv[0].code ) ) clear_guesses(); } break; case Filter::same_height: break; // handled at line level case Filter::text_block: break; // handled at block level case Filter::upper_num_mark: // fall through case Filter::upper_num_only: remove = true; // fall through case Filter::upper_num: if( !UCS::isupper( code ) && !UCS::isdigit( code ) && !UCS::isspace( code ) ) { for( int i = 1; i < guesses(); ++i ) if( UCS::isupper( gv[i].code ) || UCS::isdigit( gv[i].code ) ) { swap_guesses( 0, i ); break; } if( !UCS::isupper( gv[0].code ) && !UCS::isdigit( gv[0].code ) ) gv[0].code = UCS::to_nearest_upper_num( gv[0].code ); if( remove && !UCS::isupper( gv[0].code ) && !UCS::isdigit( gv[0].code ) ) clear_guesses(); } break; case Filter::user: break; // handled by apply_user_filter } } void Character::apply_user_filter( const User_filter & user_filter ) { if( !guesses() || UCS::isspace( gv[0].code ) ) return; int new_code = user_filter.get_new_code( gv[0].code ); if( new_code >= 0 ) gv[0].code = new_code; else // disabled { for( int i = 1; i < guesses(); ++i ) { new_code = user_filter.get_new_code( gv[i].code ); if( new_code >= 0 ) { swap_guesses( 0, i ); gv[0].code = new_code; break; } } if( new_code < 0 ) clear_guesses(); } }