/* GNU Ocrad - Optical Character Recognition program Copyright (C) 2003-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include "common.h" #include "rectangle.h" #include "segment.h" #include "ucs.h" #include "bitmap.h" #include "blob.h" #include "profile.h" #include "feats.h" Features::Features( const Blob & b_ ) : b( b_ ), hbar_initialized( false ), vbar_initialized( false ), lp( b, Profile::left ), tp( b, Profile::top ), rp( b, Profile::right ), bp( b, Profile::bottom ), hp( b, Profile::height ), wp( b, Profile::width ) {} void Features::row_scan_init() const { int l = -1; // begin of segment. -1 means no segment row_scan.resize( b.height() ); for( int row = b.top(); row <= b.bottom(); ++row ) for( int col = b.left(); col <= b.right(); ++col ) { bool black = b.get_bit( row, col ); if( l < 0 && black ) l = col; // begin of segment if( l >= 0 && ( !black || col == b.right() ) ) // end of segment { row_scan[row-b.top()].push_back( Csegment( l, col - !black ) ); l = -1; } } } void Features::col_scan_init() const { int t = -1; // begin of segment. -1 means no segment col_scan.resize( b.width() ); for( int col = b.left(); col <= b.right(); ++col ) for( int row = b.top(); row <= b.bottom(); ++row ) { bool black = b.get_bit( row, col ); if( t < 0 && black ) t = row; // begin of segment if( t >= 0 && ( !black || row == b.bottom() ) ) // end of segment { col_scan[col-b.left()].push_back( Csegment( t, row - !black ) ); t = -1; } } } int Features::hbars() const { if( !hbar_initialized ) { hbar_initialized = true; if( row_scan.empty() ) row_scan_init(); std::vector< Csegment > segv; segv.reserve( b.height() ); for( unsigned i = 0; i < row_scan.size(); ++i ) { if( row_scan[i].size() == 1 ) { segv.push_back( row_scan[i][0] ); continue; } int maxsize = 0, jmax = -1; for( unsigned j = 0; j < row_scan[i].size(); ++j ) { const int size = row_scan[i][j].size(); if( maxsize < size ) { maxsize = size; jmax = j; } } if( jmax >= 0 ) segv.push_back( row_scan[i][jmax] ); else segv.push_back( Csegment() ); } const int limit = ( wp.max() + 1 ) / 2; int state = 0, begin = 0, l = 0, r = 0; for( int i = 0; i < b.height(); ++i ) { Csegment & seg = segv[i]; switch( state ) { case 0: if( seg.size() <= limit ) break; state = 1; begin = i; l = seg.left; r = seg.right; if( i < b.height() - 1 ) break; case 1: if( seg.size() > limit && ( i <= begin || seg.overlaps( segv[i-1] ) ) ) { if( seg.left < l ) l = seg.left; if( seg.right > r ) r = seg.right; if( i < b.height() - 1 ) break; } state = 0; int end = ( seg.size() <= limit ) ? i - 1 : i; const int width = r - l + 1; while( begin <= end && 3 * segv[begin].size() < 2 * width ) ++begin; while( begin <= end && 3 * segv[end].size() < 2 * width ) --end; const int height = end - begin + 1; if( height < 1 || height > width ) break; const int margin = std::max( height, ( b.height() / 10 ) + 1 ); if( begin >= margin ) { bool good = false; for( int j = margin; j > 0; --j ) if( 3 * segv[begin-j].size() <= 2 * width ) { good = true; break; } if( !good ) break; } if( end + margin < b.height() ) { bool good = false; for( int j = margin; j > 0; --j ) if( 3 * segv[end+j].size() <= 2 * width ) { good = true; break; } if( !good ) break; } hbar_.push_back( Rectangle( l, begin+b.top(), r, end+b.top() ) ); break; } } while( hbar_.size() > 3 ) // remove noise hbars { int wmin = hbar_[0].width(); for( unsigned i = 1; i < hbar_.size(); ++i ) if( hbar_[i].width() < wmin ) wmin = hbar_[i].width(); for( int i = hbar_.size() - 1; i >= 0; --i ) if( hbar_[i].width() == wmin ) hbar_.erase( hbar_.begin() + i ); } } return hbar_.size(); } int Features::vbars() const // FIXME small gaps not detected { if( !vbar_initialized ) { vbar_initialized = true; int state = 0, begin = 0, limit = b.height(); limit -= ( b.height() < 40 ) ? 3 : b.height() / 10; for( int col = b.left(); col <= b.right(); ++col ) { int c = 0, c2 = 0, count = 0; for( int row = b.top() + 1; row < b.bottom(); ++row ) { if( b.get_bit( row, col ) ) { ++c; if( row < b.bottom() - 1 ) continue; } else if( ( col > b.left() && b.get_bit( row, col - 1 ) ) || ( col < b.right() && b.get_bit( row, col + 1 ) ) ) { ++c; ++c2; if( row < b.bottom() - 1 ) continue; } if( c > count ) { count = c; } c = 0; } if( ( count - c2 ) * 3 < limit * 2 ) count = 0; switch( state ) { case 0: if( count >= limit ) { state = 3; begin = col; } else if( count * 4 >= limit * 3 ) { state = 2; begin = col; } else if( count * 3 >= limit * 2 ) { state = 1; begin = col; } break; case 1: if( count >= limit ) state = 3; else if( count * 4 >= limit * 3 ) state = 2; else if( count * 3 < limit * 2 ) state = 0; else begin = col; break; case 2: if( count >= limit ) state = 3; else if( count * 3 < limit * 2 ) state = 0; else if( count * 4 < limit * 3 ) state = 1; break; case 3: if( count * 3 < limit * 2 || col == b.right() ) { int end = ( count * 3 < limit * 2 ) ? col - 1 : col; vbar_.push_back( Rectangle( begin, b.top(), end, b.bottom() ) ); state = 0; } } } } return vbar_.size(); } Csegment Features::v_segment( const int row, const int col ) const { const int segments = segments_in_col( col ); for( int i = 0; i < segments; ++i ) if( col_scan[col-b.left()][i].includes( row ) ) return col_scan[col-b.left()][i]; return Csegment(); } int Features::test_misc( const Rectangle & charbox ) const { if( bp.minima() == 1 ) { if( hbars() == 1 && hbar(0).top() <= b.top() + ( b.height() / 10 ) && 4 * hbar(0).height() <= b.height() && 5 * hbar(0).width() >= 4 * b.width() && rp[hbar(0).bottom()-b.top()+2] - rp[hbar(0).bottom()-b.top()] < b.width() / 4 && rp.increasing( hbar(0).vcenter() - b.top() + 1 ) ) return '7'; if( b.height() > b.width() && rp.increasing() && !tp.decreasing() && b.seek_left( b.vcenter(), b.hcenter() ) <= b.left() ) return '7'; } if( tp.minima( b.height() / 4 ) == 1 && bp.minima( b.height() / 4 ) == 1 ) { if( b.height() > 2 * b.width() && rp.increasing() && tp.decreasing() && lp.iscpit( 25 ) ) return '1'; if( hbars() == 1 || ( hbars() == 2 && hbar(1).bottom() >= b.bottom() - 1 && 3 * hbar(0).width() > 4 * hbar(1).width() ) ) if( 3 * hbar(0).height() < b.height() && hbar(0).top() <= b.top() + 1 ) { int i = lp.pos( 40 ); if( 3 * wp[i] < b.width() && 5 * lp[i] > b.width() && 5 * rp[i] > b.width() ) return 'T'; } if( 3 * b.height() > 4 * b.width() && vbars() == 1 && vbar(0).width() >= 2 ) { const int lg = vbar(0).left() - b.left(); const int rg = b.right() - vbar(0).right(); if( 2 * lg < b.width() && 2 * rg < b.width() && Ocrad::similar( lg, rg, 40 ) && 4 * bp[bp.pos(25)] > 3 * b.height() && 4 * tp[tp.pos(75)] > 3 * b.height() ) return 'l'; } if( 5 * b.height() >= 4 * charbox.height() && b.height() > wp.max() && 3 * wp[wp.pos(50)] < b.width() ) { if( hbars() == 1 && hbar(0).bottom() >= b.bottom() - 1 && hbar(0).top() > b.vpos( 75 ) && Ocrad::similar( lp[lp.pos(50)], rp[rp.pos(50)], 20, 2 ) ) return 'l'; if( hbars() == 2 && hbar(0).bottom() < b.vpos( 25 ) && hbar(1).top() > b.vpos( 75 ) && hbar(1).bottom() >= b.bottom() - 1 /*&& 3 * hbar(0).width() < 4 * hbar(1).width()*/ ) { if( hbar(0).right() <= hbar(1).hcenter() ) return 0; if( 3 * hbar(0).width() <= 2 * hbar(1).width() || b.height() >= 3 * wp.max() ) return 'l'; return 'I'; } } if( ( hbars() == 2 || hbars() == 3 ) && hbar(0).top() <= b.top() + 1 && hbar(1).includes_vcenter( b ) && 3 * hbar(0).width() > 4 * hbar(1).width() && ( hbars() == 2 || ( hbar(2).bottom() >= b.bottom() - 1 && 3 * hbar(0).width() > 4 * hbar(2).width() ) ) ) return 'F'; if( b.height() > 3 * wp.max() ) { if( rp.istip() && lp.ispit() ) { if( lp.istpit() ) return '{'; else return '('; } if( lp.istip() && rp.ispit() ) { if( rp.istpit() ) return '}'; else return ')'; } if( b.width() > 2 * wp.max() && rp.isconvex() ) return ')'; } if( b.height() > 2 * b.width() && 5 * b.height() >= 4 * charbox.height() && lp.max() + rp.max() < b.width() ) { if( 5 * rp[rp.pos(50)] > 2 * b.width() ) { const int row = b.seek_top( b.vpos( 75 ), b.hpos( 75 ) ); if( ( b.top() < charbox.top() || b.bottom() <= charbox.bottom() + ( b.height() / 5 ) ) && row <= b.top() ) return 'L'; if( row > b.top() && b.seek_bottom( b.vpos( 75 ), b.hpos( 75 ) ) < b.bottom() ) return '['; } return '|'; } } return 0; }