/* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include "common.h"
#include "rectangle.h"
#include "segment.h"
#include "ucs.h"
#include "bitmap.h"
#include "blob.h"
#include "profile.h"
#include "feats.h"
// Looks for three black sections in column hcenter() ± n, then tests if
// upper and lower gaps are open to the right or to the left
//
int Features::test_235Esz( const Charset & charset ) const
{
const int csize = 3;
const int ucoff[csize] = { 0, -1, +1 };
const int lcoff[3*csize] = { 0, -1, +1, -1, 0, +1, +1, 0, -1 };
if( b.width() < 9 || b.height() > 3 * b.width() ||
bp.minima( b.height() / 2 ) > 1 ) return 0;
const int noise = ( std::min( b.height(), b.width() ) / 15 ) + 1;
int lrow1 = 0, urow2 = 0, lrow2 = 0, urow3 = 0;
int lcol1 = 0, ucol2 = 0, lcol2 = 0, ucol3 = 0;
bool done = false;
for( int i = 0; i < csize && !done; ++i )
{
const int ucol = b.hcenter() + ( noise * ucoff[i] );
int row = b.top() + tp[ucol-b.left()];
while( ++row < b.bottom() && b.get_bit( row, ucol ) ) ;
if( row <= b.vpos( 30 ) ) { lrow1 = row; lcol1 = ucol; } else continue;
while( ++row < b.bottom() && !b.get_bit( row, ucol ) ) ;
if( row < b.bottom() )
{
urow2 = row - 1; ucol2 = ucol;
for( int j = 0; j < csize && !done; ++j )
{
row = urow2 + 1;
const int lcol = b.hcenter() + ( noise * lcoff[(csize*i)+j] );
if( ucol != lcol )
{
const int d = ( ucol > lcol ) ? +1 : -1;
int c = lcol; while( c != ucol && b.get_bit( row, c ) ) c += d;
if( c != ucol ) continue;
}
while( ++row < b.bottom() && b.get_bit( row, lcol ) ) ;
if( row < b.bottom() ) { lrow2 = row; lcol2 = lcol; } else continue;
while( ++row <= b.bottom() && !b.get_bit( row, lcol ) ) ;
if( row <= b.bottom() && row > b.vpos( 70 ) )
{ urow3 = row - 1; ucol3 = lcol; done = true; }
}
}
}
if( !done ) return 0;
const bool bopen = b.escape_bottom( urow3, ucol3 );
const bool topen = b.escape_top( lrow1, lcol1 );
const bool tbopen = bopen && topen;
const int ascode = ( b.get_bit( b.vcenter(), b.hcenter() ) ) ? '*' : 0;
if( b.escape_left( lrow2, lcol2 ) )
{
if( b.escape_left( urow2, ucol2 ) )
{
if( tbopen ) return ascode;
if( !bopen && !topen && b.height() <= 3 * b.width() )
{
const int lm = lp.minima(), rm = rp.minima();
if( ( lm == 3 || lm == 2 ) &&
( rm == 2 || ( rm == 1 && rp.iminimum() < rp.pos( 80 ) ) ) )
return '3';
}
}
else if( b.escape_right( urow2, ucol2 ) )
{
if( tbopen ) return ascode;
if( rp[lrow1 + 1 - b.top()] >= lcol1 - b.left() &&
( lp[lrow2 + 1 - b.top()] < lcol2 - b.left() ||
lp[urow3 - 1 - b.top()] < ucol3 - b.left() ) )
{
for( int i = lp.pos( 40 ); i <= lp.pos( 70 ); ++i )
if( 5 * lp[i] < b.width() && 2 * lp[i+1] > b.width() ) return '5';
int c = 0, hdiff;
if( !b.top_hook( &hdiff ) || 5 * hdiff >= 4 * b.height() ) ++c;
if( 2 * lp[lrow2 - b.top()] < lcol2 - b.left() ) ++c;
if( !tp.isconvex() || ( !tp.ispit() && bp.ispit() ) ) ++c;
if( c >= 2 ) return '5';
}
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
if( urow2 > b.vpos( 55 ) &&
b.seek_right( urow2 - 1, ucol2 ) < b.right() )
{ if( urow2 > b.vpos( 63 ) ) return UCS::CCCEDI;
else return UCS::SCCEDI; }
return 's';
}
}
else if( b.escape_right( lrow2, lcol2 ) )
{
if( b.escape_right( urow2, ucol2 ) )
{
if( tbopen ) return ascode;
if( bp.minima( b.height() / 5 ) == 1 )
{
if( 8 * lp[((lrow2+urow3)/2)-b.top()] >= b.width() &&
b.escape_top( ( lrow1 + urow2 ) / 2, b.left() ) &&
!b.escape_top( ( lrow2 + urow3 ) / 2, b.left() ) ) return 'f';
if( rp.minima( b.width() / 8 ) < 3 && b.escape_bottom( urow3, ucol3 ) )
{
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
if( 2 * lp[lp.pos(95)] > rp[rp.pos(95)] )
{ if( urow2 > b.vpos( 63 ) ) return UCS::CCCEDI;
else return UCS::SCCEDI; }
return 'F';
}
else if( lrow1 < urow2 && urow2 + 2 < lrow2 && lrow2 < urow3 &&
urow2 <= b.vcenter() && lrow2 >= b.vcenter() ) return 'E';
}
}
else if( b.escape_left( urow2, ucol2 ) )
{
if( !tbopen && ( 2 * lp[lp.pos(50)] ) + 2 >= b.width() &&
( tp.isconvex() || ( (tp.ispit() || tp.isrtip()) && !bp.ispit() ) ) )
return '2';
if( 2 * b.height() <= 5 * wp.max() && bp[bp.pos(75)] <= b.height() / 10 &&
Ocrad::similar( wp.max( 0, wp.pos(30) ), wp.max( wp.pos(70) ), 20 ) )
return 'z';
}
}
return 0;
}
int Features::test_EFIJLlT( const Charset & charset, const Rectangle & charbox ) const
{
if( tp.minima( b.height() / 4 ) != 1 || bp.minima( b.height() / 4 ) != 1 )
return 0;
const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
{
const bool maybe_j = ( 2 * ( lp[lp.pos(50)] + noise ) >= b.width() );
const int col = b.hpos( maybe_j ? 25 : 75 );
int row = b.seek_top( b.vcenter(), col );
if( row <= b.top() || ( row < b.vpos( 25 ) && b.escape_top( row, col ) ) )
{
int hdiff;
if( b.bottom_hook( &hdiff ) )
{
if( maybe_j && hdiff > b.height() / 2 &&
rp.increasing( rp.pos( 80 ), 1 ) && !rp.decreasing() ) return 'J';
if( !maybe_j && -hdiff > b.height() / 2 )
{
if( 5 * lp[lp.pos(80)] >= 2 * b.width() ) return 'v'; // broken 'v'
if( col > b.hcenter() ) return 'L';
}
}
}
}
const int vnoise = ( b.height() / 30 ) + 1;
const int topmax = b.top() + vnoise;
const int botmin = b.bottom() - vnoise;
if( vbars() == 1 && vbar(0).width() >= 2 && 2 * vbar(0).width() <= b.width() )
{
if( std::abs( vbar(0).hcenter() - b.hcenter() ) <= noise &&
std::abs( (vbar(0).left() - b.left()) - (b.right() - vbar(0).right()) ) <= 2 * noise )
{
if( hbars() == 1 && 4 * hbar(0).height() <= b.height() )
{
if( ( hbar(0).top() <= topmax || hbar(0).bottom() < b.vpos( 15 ) ) &&
hbar(0).width() >= wp[wp.pos(75)] + wp[wp.pos(80)] &&
4 * lp[lp.pos(50)] >= b.width() )
return 'T';
if( std::abs( hbar(0).vcenter() - b.vcenter() ) <= vnoise &&
hbar(0).width() >= b.width() &&
Ocrad::similar( b.height(), b.width(), 50 ) ) return '+';
}
if( hbars() == 2 &&
hbar(0).top() <= topmax && 4 * hbar(0).height() <= b.height() &&
hbar(1).bottom() >= botmin && 4 * hbar(1).height() <= b.height() &&
3 * hbar(0).width() > 4 * hbar(1).width() )
return 'T';
}
}
if( vbars() == 1 && vbar(0).width() >= 2 )
{
if( 2 * vbar(0).width() <= b.width() && vbar(0).right() <= b.hcenter() )
{
if( ( hbars() == 2 || hbars() == 3 ) && hbar(0).top() <= topmax &&
hbar(0).width() + 1 >= hbar(1).width() &&
2 * hbar(1).width() >= 3 * vbar(0).width() &&
vbar(0).h_overlaps( hbar(1) ) )
{
if( hbars() == 3 &&
Ocrad::similar( hbar(0).width(), hbar(2).width(), 10, 2 ) &&
10 * hbar(2).width() >= 9 * hbar(1).width() &&
hbar(0).left() <= hbar(1).left() + 1 )
return 'E';
if( ( hbars() == 2 || hbar(0).width() > hbar(2).width() ) &&
( hbar(1).includes_vcenter( b ) ||
( 3 * hbar(1).width() > 2 * hbar(0).width() &&
10 * lp[vnoise] < b.width() && hbar(1).top() > b.vpos( 30 ) &&
hbar(1).bottom() < b.vpos( 60 ) ) ) )
return 'F';
}
if( hbars() == 2 && hbar(1).bottom() >= botmin &&
b.height() > b.width() && hbar(1).width() > hbar(0).width() &&
std::abs( vbar(0).hcenter() - hbar(0).hcenter() ) <= 1 &&
rp.iminimum() > rp.pos( 70 ) )
return 'L';
if( hbars() == 1 && Ocrad::similar( hbar(0).width(), b.width(), 10 ) &&
vbar(0).left() <= b.hpos( 30 ) )
{
if( hbar(0).bottom() >= botmin &&
b.escape_top( b.vcenter(), b.hpos( 75 ) ) )
return 'L';
if( hbar(0).top() <= topmax && 2 * wp[wp.pos(50)] >= b.width() &&
4 * wp[wp.pos(75)] < b.width() &&
b.escape_right( b.vpos( 25 ), b.hcenter() ) )
return 'F';
}
}
if( 3 * vbar(0).width() < 2 * b.width() && vbar(0).left() > b.hpos( 33 ) &&
hbars() == 1 )
{
if( vbar(0).right() >= b.hpos( 90 ) && hbar(0).bottom() >= botmin &&
hbar(0).left() == b.left() &&
b.bottom() > charbox.vpos( 90 ) &&
b.escape_top( b.vcenter(), b.hpos( 25 ) ) )
{ if( b.height() > b.width() ) return 'J'; else return 0; }
if( hbar(0).top() <= topmax && hbar(0).width() + 1 >= b.width() &&
b.width() > b.height() )
{
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) ) return UCS::NOT;
return 0;
}
}
}
if( vbars() == 1 && vbar(0).width() >= 2 &&
tp.minima() == 1 && bp.minima() == 1 )
{
if( 3 * b.height() > 4 * b.width() &&
Ocrad::similar( vbar(0).left() - b.left(),
b.right() - vbar(0).right(), 30, 2 * noise ) )
{
if( b.height() <= 3 * wp.max() && rp.istip() && lp.istip() )
{
if( b.height() <= 3 * b.width() &&
lp[lp.pos(40)] > lp[lp.pos(60)] + noise &&
rp[rp.pos(60)] > rp[rp.pos(40)] + noise ) return 'z';
return 'I';
}
if( rp.isflats() &&
( lp.istip() || lp.isflats() ||
( lp.isctip() && lp.minima() == 2 &&
lp.iminimum() < lp.pos( 30 ) && lp.iminimum(1) > lp.pos( 80 ) ) ) )
return 'l';
if( b.height() > 3 * wp.max() )
{
if( rp.istip() && lp.ispit() && Ocrad::similar( lp.iminimum(), lp.pos( 50 ), 10 ) )
{ if( lp.istpit() ) return '{'; else return '('; }
if( lp.istip() && rp.ispit() && Ocrad::similar( rp.iminimum(), rp.pos( 50 ), 10 ) )
{ if( rp.istpit() ) return '}'; else return ')'; }
if( rp.isflats() && 2 * vbar(0).size() >= b.area() ) return 'l';
}
if( 2 * b.height() > 3 * b.width() && lp.minima() <= 2 )
if( rp.isflats() || rp.minima() == 1 )
if( vbar(0).right() >= b.hpos( 70 ) ||
b.escape_top( b.vpos( 75 ), std::min( b.right(), vbar(0).right() + 1 ) ) )
for( int i = vbar(0).left() - 1; i > b.left(); --i )
if( b.seek_bottom( b.vpos( 75 ), i ) < b.bottom() &&
bp[i-b.left()] <= noise ) return 'l';
}
if( vbar(0).right() >= b.right() - 1 )
{
if( lp.istip() && b.height() > 2 * b.width() )
{
if( 2 * vbar(0).width() <= wp.max() &&
lp[lp.pos(50)] >= b.width() / 2 ) return ']';
if( b.height() >= 3 * b.width() ) return 'l';
}
if( 2 * b.height() >= 3 * b.width() &&
vbar(0).height() >= 3 * vbar(0).width() &&
lp.istpit() && lp.minima() == 1 )
{ const int i = lp.iminimum();
if( i > lp.pos( 10 ) && i < lp.pos( 40 ) ) return '1'; }
}
}
if( hbars() == 1 && hbar(0).width() >= b.width() &&
std::abs( hbar(0).vcenter() - b.vcenter() ) <= vnoise &&
Ocrad::similar( b.height(), b.width(), 50 ) &&
tp.isupit() && bp.isupit() )
return '+';
return 0;
}
int Features::test_c() const
{
if( lp.isconvex() || lp.ispit() )
{
int urow = b.seek_top( b.vcenter(), b.hcenter() );
int lrow = b.seek_bottom( b.vcenter(), b.hcenter() );
if( b.height() > 2 * b.width() && 3 * wp.max() <= 2 * b.width() )
{ if( lp.isconvex() ) return '('; else return 0; }
if( urow > b.top() && lrow < b.bottom() && rp.isctip() &&
( bp.ispit() || tp.ispit() || ( bp.isltip() && tp.isltip() ) ) &&
b.escape_right( b.vcenter(), b.hcenter() ) )
return 'c';
}
if( b.height() > 2 * b.width() && rp.isconvex() )
{
int urow = b.seek_top( b.vcenter(), b.hcenter() );
int lrow = b.seek_bottom( b.vcenter(), b.hcenter() );
if( 3 * wp.max() <= 2 * b.width() ||
( 2 * lp[urow-b.top()] >= b.width() && 2 * lp[lrow-b.top()] >= b.width() ) )
return ')';
}
return 0;
}
int Features::test_frst( const Rectangle & charbox ) const
{
if( bp.minima( b.height() / 4 ) != 1 || tp.minima( b.height() / 2 ) != 1 ||
bp.minima( b.height() / 2 ) != 1 ) return 0;
const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
const bool maybe_slanted_r = ( tp.minima( b.height() / 4 ) != 1 );
bool maybe_t = true;
if( !maybe_slanted_r )
{
int b_hdiff = 0, t_hdiff = 0;
if( b.bottom_hook( &b_hdiff ) )
{
if( -2 * b_hdiff > b.height() )
{
if( b.height() >= 3 * wp.max() && !lp.ispit() &&
( hbars() == 0 || hbar(0).bottom() < b.vpos( 20 ) ) ) return 'l';
if( 2 * wp[wp.pos(6)] < b.width() && hbars() >= 1 && hbars() <= 2 &&
hbar(0).top() >= b.vpos( 15 ) && hbar(0).bottom() < b.vcenter() &&
Ocrad::similar( hbar(0).width(), wp.max(), 10 ) ) return 't';
}
}
if( b.top_hook( &t_hdiff ) )
{
if( 3 * t_hdiff > 2 * b.height() && b.height() > 2 * wp.max() &&
tp.iminimum() > tp.pos( 50 ) && bp.iminimum() <= bp.pos( 50 ) &&
( !b_hdiff || rp.increasing( rp.pos( 50 ) ) ) )
return 'f';
if( 2 * b_hdiff > b.height() && 2 * t_hdiff > b.height() )
return 0; // recognized 's' or SCCEDI
maybe_t = false;
}
}
if( 2 * rp[rp.pos(50)] > b.width() &&
2 * bp[bp.pos(50)] > b.height() && tp.isctip() ) return 'r';
if( maybe_slanted_r || vbars() != 1 || vbar(0).width() < 2 ) return 0;
if( vbar(0).hcenter() <= b.hcenter() )
{
const int col = b.right() - rp[rp.pos(50)] + 2;
if( col < b.right() )
{
const int row = b.seek_bottom( b.vcenter(), col );
if( row >= b.bottom() || b.escape_bottom( row - 1, col ) )
{
if( rp.minima() == 3 )
{ if( rp.minima( b.width() / 8 ) < 3 ) return 'f'; else return 0; }
if( Ocrad::similar( b.height(), b.width(), 40 ) )
{
if( tp.minima( b.height() / 8 ) == 2 &&
bp.minima( b.height() / 8 ) == 2 ) return 'x';
int row2 = b.vpos( 75 );
int col2 = b.seek_right( row2, b.hcenter(), false ) + 1;
if( b.seek_right( row2, col2 ) >= b.right() )
{
if( lp.isconvex() && ( col > b.hpos( 60 ) || row < b.bottom() ) )
return 0;
if( ( hbars() == 1 ||
( hbars() == 2 && hbar(1).bottom() >= b.bottom() - 1 &&
2 * hbar(0).width() > 3 * hbar(1).width() ) ) &&
hbar(0).top() <= b.top() + 1 &&
4 * hbar(0).height() <= b.height() &&
4 * lp[lp.pos(50)] >= b.width() )
return 'T';
if( 3 * rp[rp.pos(50)] > b.width() ) return 'r';
return 0;
}
}
}
if( Ocrad::similar( b.height(), b.width(), 40 ) &&
segments_in_row( b.vpos( 15 ) ) == 3 &&
segments_in_row( b.vpos( 85 ) ) == 3 &&
b.seek_right( row - 1, col ) < b.right() && lp.isctip() )
return 'x';
}
if( 3 * b.height() > 4 * b.width() && vbar(0).left() > b.left() &&
rp.minima() <= 2 )
{
const int col = b.right() - std::max( 0, rp[rp.pos(50)] - 1 );
if( !b.escape_bottom( b.vcenter(), col ) )
{
if( 3 * wp[wp.pos(6)] < 2 * b.width() && tp.ispit() &&
lp.iminimum() < lp.pos( 40 ) ) return 't';
else return 0;
}
else if( 2 * wp.max() > b.width() )
{
if( rp.iminimum() < rp.pos( 20 ) )
{
if( rp.increasing( rp.pos( 20 ) ) || bp.increasing() ||
tp.minima( noise ) == 2 ||
( rp.minima() == 1 && ( b.height() < charbox.height() || tp.iminimum() > tp.pos( 50 ) ) ) )
{ if( b.height() <= 3 * wp.max() ) return 'r'; else return 0; }
else if( 3 * b.height() >= 5 * b.width() && !rp.istip() ) return 'f';
}
else
{
if( maybe_t && !rp.isconvex() && bp.minima( b.height() / 3 ) == 1 )
return 't';
else return 0;
}
}
}
if( b.seek_bottom( b.vcenter(), b.hpos( 60 ) + 1 ) >= b.bottom() )
{ if( rp.minima() == 2 ) return 'f'; else return 'r'; }
if( vbar(0).right() <= b.hcenter() && hbars() == 1 &&
hbar(0).bottom() >= b.bottom() - 1 && lp.istip() && rp.istip() &&
!b.escape_top( b.vcenter(), b.hpos( 75 ) ) )
return 'r';
}
return 0;
}
int Features::test_G() const
{
if( lp.isconvex() || lp.ispit() )
{
int col = 0, row = 0;
for( int i = rp.pos( 60 ); i >= rp.pos( 30 ); --i )
if( rp[i] > col ) { col = rp[i]; row = i; }
if( col == 0 ) return 0;
row += b.top(); col = b.right() - col + 1;
if( col <= b.left() || col >= b.hcenter() ) return 0;
col = ( col + b.hcenter() ) / 2;
row = b.seek_bottom( row, col );
if( row < b.bottom() && b.escape_right( row, col ) &&
!b.escape_bottom( row, b.hcenter() ) )
{
const int noise = std::max( 2, b.height() / 20 );
int lrow, urow;
for( lrow = row - 1 ; lrow > b.top(); --lrow )
if( b.seek_right( lrow, b.hcenter() ) >= b.right() ) break;
for( urow = lrow - 1 ; urow > b.top(); --urow )
if( b.seek_right( urow, b.hcenter() ) < b.right() ) break;
lrow += noise;
if( lrow < row && urow > b.top() )
{
urow -= std::min( noise, ( urow - b.top() ) / 2 );
int uwidth = b.seek_left( urow, b.right() ) - b.seek_right( urow, b.hcenter() );
int lwidth = b.seek_left( lrow, b.right() ) - b.seek_right( lrow, b.hcenter() );
if( lrow - noise <= b.vcenter() || lwidth > uwidth + noise )
return 'G';
}
}
}
return 0;
}
// Common feature: U-shaped top of character
//
int Features::test_HKMNUuvwYy( const Rectangle & charbox ) const
{
if( tp.minima( b.height() / 5 ) == 2 && tp.minima( b.height() / 4 ) == 2 &&
tp.minima( b.height() / 2 ) <= 3 && tp.isctip() )
{
const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
const int m5 = bp.minima( b.height() / 5 );
if( 2 * b.height() >= b.width() && b.height() >= 10 &&
( m5 == 1 ||
( m5 == 2 && Ocrad::similar( bp.iminimum(), bp.pos( 50 ), 10 ) ) ) )
{
const int stem = std::min( tp.range() + ( b.height() / 10 ), wp.pos(90) );
const bool maybe_Y = ( 5 * tp.range() <= 3 * b.height() ||
( stem <= wp.pos(75) && 5 * wp[stem] <= b.width() ) );
const int lg = lp.min( lp.pos( 90 ) );
if( lg > 1 && bp.isvpit() && tp.minima( b.height() / 2 ) == 2 &&
lp[lp.pos(75)] <= lg &&
( !maybe_Y || 3 * wp[stem] > b.width() || wp[stem] > wp[wp.pos(90)] + 1 ) )
return 'v';
int hdiff;
if( b.bottom_hook( &hdiff ) )
{
if( std::abs( hdiff ) <= b.height() / 8 )
{
if( segments_in_row( b.vpos( 30 ) ) >= 3 ) return 'v';
if( bp.isconvex() )
{ if( 9 * wp[wp.pos(30)] > 10 * wp[wp.pos(50)] &&
9 * wp[wp.pos(50)] > 10 * wp[wp.pos(70)] ) return 'v';
else return 'u'; }
}
if( hdiff > b.height() / 2 )
{ if( bp.minima( b.height() / 2 ) == 1 ) return 'y'; else return 0; }
}
const int rg = rp.min( rp.pos( 90 ) );
const int lg2 = lp.max( lp.pos( 70 ), lp.pos( 90 ) );
const int rg2 = rp.max( rp.pos( 70 ), rp.pos( 90 ) );
const int lc = ( lg + ( 2 * ( lp.limit() - rg ) ) ) / 3;
const int lc2 = ( lg2 + lp.limit() - rg2 ) / 2;
if( bp.ispit() && maybe_Y )
{
int row2 = b.top();
while( row2 < b.bottom() && segments_in_row( row2 ) != 2 ) ++row2;
int row1 = row2 + 1;
while( row1 < b.bottom() && segments_in_row( row1 ) != 1 ) ++row1;
if( row1 < b.bottom() ) row1 += wp[row1-b.top()] / 4;
if( row1 < b.bottom() && wp[row1-b.top()] < b.width() )
{
const int w1 = wp[row1-b.top()];
int row0 = w1 * ( row1 - row2 ) / ( b.width() - w1 ) + row1;
if( row0 < b.bottom() && 2 * wp[wp.pos(70)] < b.width() &&
( Ocrad::similar( lg, rg, 20 ) ||
( lg > 1 && lg < rg && lc >= lc2 && !rp.increasing() ) ) )
return 'Y';
}
}
if( b.escape_top( b.vpos( 60 ), b.hcenter() ) && !lp.istip() &&
( 4 * b.height() >= 3 * b.width() ||
segments_in_col( b.hpos( 75 ) ) <= 2 ) ) return 'u';
if( lg < rg + 1 && !lp.increasing( lp.pos( 50 ) ) &&
( 2 * lg < rg || b.vpos( 90 ) >= charbox.bottom() ) &&
( tp.minima( b.height()/2 ) == 1 || lp.imaximum() > b.height()/2 ) )
return 'y';
if( lg > 1 && bp.ispit() && tp.minima( b.height() / 3 ) == 2 )
return 'v';
if( lg <= 1 && 2 * ( b.width() - rg - lg ) < b.width() &&
rp.increasing() && tp.minima( b.height() / 2 ) == 2 ) return 'v';
return 0;
}
if( 2 * b.height() >= b.width() && b.height() >= 9 &&
bp.minima() == 2 && bp.isctip() )
{
const int th = std::max( b.height() / 4, bp[bp.pos(50)] + noise );
if( bp.minima( th ) == 3 ) return 'M';
const int lg = lp[lp.pos(50)];
const int rg = rp[rp.pos(50)];
if( Ocrad::similar( lg, rg, 80, 2 ) &&
4 * lg < b.width() && 4 * rg < b.width() )
{
if( lg > 1 && rg > 1 && lp.increasing() && rp.increasing() &&
5 * tp[tp.pos(50)] > b.height() )
return 'w';
if( hbars() == 1 && 5 * ( hbar(0).height() - 1 ) < b.height() &&
hbar(0).top() >= b.vpos( 30 ) && hbar(0).bottom() <= b.vpos( 60 ) &&
10 * hbar(0).width() > 9 * wp[hbar(0).vcenter()-b.top()] &&
Ocrad::similar( v_segment( hbar(0).vcenter(), hbar(0).hcenter() ).size(),
hbar(0).height(), 30, 2 ) )
{
if( 9 * hbar(0).width() <= 10 * wp[wp.pos(50)] ) return 'H';
return 0;
}
if( segments_in_row( b.vpos( 60 ) ) == 4 ||
segments_in_row( b.vpos( 70 ) ) == 4 )
{
if( 2 * tp[tp.pos(50)] > b.height() ) return 'M';
return 'w';
}
if( ( vbars() <= 2 || ( vbars() == 3 && b.height() >= b.width() ) ) &&
tp.minima( b.height() / 2 ) <= 2 &&
tp.minima( ( 2 * b.height() ) / 5 ) <= 2 && !lp.istpit() &&
4 * std::abs( rp[rp.pos(20)] - rp[rp.pos(80)] ) <= b.width() )
{
const int row = b.top() + tp[tp.pos(50)];
if( row > b.vcenter() )
{
Rectangle r( b.left(), b.top(), b.hcenter(), b.bottom() );
Bitmap bm( b, r );
int hdiff;
if( bm.bottom_hook( &hdiff ) && -2 * hdiff > bm.height() ) return 'u';
}
if( row > b.vpos( 10 ) || vbars() >= 2 ) return 'N';
}
return 0;
}
if( 3 * lg < 2 * rg && lg < b.width() / 4 && rg > b.width() / 4 &&
rp.isctip() && tp.minima( b.height() / 8 ) == 2 ) return 'K';
return 0;
}
if( bp.minima() <= 2 && 2 * b.width() > 5 * b.height() ) return '~';
if( bp.minima() == 3 &&
( hbars() == 0 || ( hbars() == 1 && hbar(0).top() >= b.vpos( 20 ) ) ) )
return 'M';
}
return 0;
}
// Looks for the nearest frontier in column hcenter(), then tests if
// gap is open downwards (except for 'x')
//
int Features::test_hknwx( const Rectangle & charbox ) const
{
const int m8 = tp.minima( b.height() / 8 );
if( m8 == 2 && bp.minima( b.height() / 2 ) == 1 &&
( ( lp.isctip() && rp.isctip() ) ||
( lp.isconcave() && rp.isconcave() ) ) ) return 'x';
if( b.width() >= b.height() && tp.ispit() &&
( b.bottom() < charbox.vcenter() || ( lp.decreasing() && rp.decreasing() ) ) )
return '^';
int col = 0, row = 0;
for( int i = bp.pos( 40 ); i <= bp.pos( 60 ); ++i )
if( bp[i] > row ) { row = bp[i]; col = i; }
row = b.bottom() - row + 1; col += b.left();
if( row > b.vpos( 90 ) || row <= b.top() ) return 0;
// FIXME follow gap up
{ int c = col; col = b.seek_right( row, col ); if( col > c ) --col;
row = b.seek_top( row, col ); }
const int urow = b.seek_top( row - 1, col, false );
if( urow > b.vpos( 20 ) || 3 * tp[tp.pos(60)] > b.height() )
{
const int m5 = tp.minima( b.height() / 5 );
if( m5 == 3 && segments_in_row( b.vcenter() ) == 2 &&
segments_in_row( b.vpos( 80 ) ) == 3 ) return 0; // merged 'IX'
if( ( m5 == 2 || m5 == 3 ) && tp.minima() >= 2 &&
rp[rp.pos(25)] <= b.width() / 4 &&
( !lp.istpit() || rp.minima() == 1 ) ) return 'w';
if( m5 == 1 && m8 == 1 && 4 * tp.max( tp.pos(40), tp.pos(60) ) < 3 * b.height() )
{ if( rp.isctip( 66 ) ) return 'k'; else return 'h'; }
return 0;
}
if( Ocrad::similar( b.height(), b.width(), 40 ) && row > b.vcenter() &&
urow < b.vcenter() && tp.minima( b.height() / 5 ) == 2 &&
bp.minima( urow + 1 ) == 3 )
return 'w';
if( urow <= b.vpos( 20 ) && tp.minima( b.height() / 4 ) == 1 &&
Ocrad::similar( b.height(), b.width(), 40 ) &&
( 8 * ( rp[rp.pos(50)] - 1 ) <= b.width() ||
tp[tp.pos(99)] > b.height() / 2 ) )
return 'n';
return 0;
}
// Looks for four black sections in column hcenter() ± 1, then tests if
// upper gap is open to the right and lower gaps are open to the left
//
int Features::test_s_cedilla() const
{
int urow2 = 0, urow3 = 0, urow4 = 0, col, black_section = 0;
for( col = b.hcenter() - 1; col <= b.hcenter() + 1; ++col )
{
bool prev_black = false;
for( int row = b.top(); row <= b.bottom(); ++row )
{
bool black = b.get_bit( row, col );
if( black && !prev_black )
{
if( ++black_section == 2 ) urow2 = row - 1;
else if( black_section == 3 ) urow3 = row - 1;
else if( black_section == 4 ) urow4 = row - 1;
}
prev_black = black;
}
if( black_section == 4 && urow2 < b.vpos( 50 ) && urow4 >= b.vpos( 70 ) )
break;
black_section = 0;
}
if( black_section == 4 && b.escape_right( urow2, col ) &&
b.escape_left( urow3, col ) && b.escape_left( urow4, col ) )
return UCS::SSCEDI;
return 0;
}
bool Features::test_comma() const
{
if( b.holes() || b.height() <= b.width() || b.height() > 3 * b.width() )
return false;
if( b.width() >= 3 && b.height() >= 3 )
{
int upper_area = 0;
for( int row = b.top(); row < b.top() + b.width(); ++row )
for( int col = b.left(); col <= b.right(); ++col )
if( b.get_bit( row, col ) ) ++upper_area;
if( upper_area < (b.width() - 2) * (b.width() - 2) ) return false;
int count1 = 0, count2 = 0;
for( int col = b.left(); col <= b.right(); ++col )
{ if( b.get_bit( b.top() + 1, col ) ) ++count1;
if( b.get_bit( b.bottom() - 1, col ) ) ++count2; }
if( count1 <= count2 ) return false;
}
return true;
}
int Features::test_easy( const Rectangle & charbox ) const
{
int code = test_solid( charbox );
if( code ) return code;
if( b.top() >= charbox.vcenter() && test_comma() ) return ',';
if( b.bottom() <= charbox.vcenter() &&
b.height() > b.width() && bp.minima() == 1 )
{
if( tp.iminimum() < tp.pos( 50 ) && bp.iminimum() > bp.pos( 50 ) )
return '`'; else return '\'';
}
if( 2 * b.height() > 3 * wp.max() && b.top() >= charbox.vcenter() &&
bp.minima() == 1 ) return ',';
return 0;
}
// Recognizes single line, non-rectangular characters without holes.
// '/<>C[\^`c
//
int Features::test_line( const Rectangle & charbox ) const
{
const int vnoise = ( b.height() / 30 ) + 1;
const int topmax = b.top() + vnoise;
const int botmin = b.bottom() - vnoise;
const bool vbar_left = ( vbars() == 1 && vbar(0).width() >= 2 &&
vbar(0).left() <= b.hpos( 10 ) + 1 );
if( tp.minima() == 1 && bp.minima() == 1 && rp.istip() )
{
if( vbar_left && b.height() > 2 * b.width() &&
2 * rp[rp.pos(50)] > b.width() )
{
int row = b.seek_top( b.vcenter(), b.hcenter() );
int col = b.seek_right( row, b.hcenter() );
if( col < b.right() )
{
row = b.seek_bottom( b.vcenter(), b.hcenter() );
col = b.seek_right( row, b.hcenter() );
if( col < b.right() ) return 'C';
}
}
if( hbars() == 2 &&
hbar(0).top() <= topmax && 4 * hbar(0).height() <= b.height() &&
hbar(1).bottom() >= botmin && 4 * hbar(1).height() <= b.height() )
{ if( vbar_left && b.height() > 2 * b.width() ) return '[';
if( vbar_left || lp.ispit() ) return 'c'; }
}
int slope1, slope2;
if( tp.minima() != 1 ) return 0;
if( lp.minima() == 1 && rp.minima() == 1 && 2 * b.height() >= b.width() &&
lp.straight( &slope1 ) && rp.straight( &slope2 ) )
{
if( slope1 < 0 && slope2 < 0 && bp.minima() == 2 ) return '^';
if( bp.minima() != 1 ) return 0;
if( slope1 < 0 && slope2 > 0 )
{
if( b.v_includes( charbox.vcenter() ) )
{
if( 10 * b.area() < 3 * b.size() ) return '/';
if( b.height() > 2 * b.width() ) return 'l';
return 0;
}
if( b.top() >= charbox.vcenter() ) return ',';
return '\'';
}
if( slope1 > 0 && slope2 < 0 )
{
if( b.bottom() > charbox.vcenter() )
{
if( ( 3 * b.width() > b.height() && b.height() > charbox.height() ) ||
2 * b.width() >= b.height() ) return '\\';
else return 0;
}
return '`';
}
return 0;
}
if( bp.minima() == 1 && 2 * b.width() >= b.height() &&
tp.straight( &slope1 ) && bp.straight( &slope2 ) )
{
if( lp.minima() == 1 && rp.minima() == 1 )
{
if( slope1 < 0 && slope2 > 0 )
{
if( b.v_includes( charbox.vcenter() ) ) return '/';
if( b.top() >= charbox.vcenter() ) return ',';
return '\'';
}
if( slope1 > 0 && slope2 < 0 )
{
if( b.bottom() > charbox.vcenter() ) return '\\';
return '`';
}
}
else if( 2 * b.width() >= b.height() )
{
if( slope1 < 0 && slope2 < 0 && lp.minima() == 1 && rp.minima() == 2 )
return '<';
if( slope1 > 0 && slope2 > 0 && lp.minima() == 2 && rp.minima() == 1 )
return '>';
}
}
return 0;
}
int Features::test_solid( const Rectangle & charbox ) const
{
if( b.holes() ) return 0;
if( b.height() >= 5 && b.width() >= 5 )
{
if( 2 * b.height() > b.width() && ( tp.minima() != 1 || bp.minima() != 1 ) )
return 0;
if( b.height() < 2 * b.width() && ( lp.minima() != 1 || rp.minima() != 1 ) )
return 0;
}
int inner_area, inner_size, porosity = 0;
if( b.width() >= 3 && b.height() >= 3 )
{
const int vnoise = ( b.height() / 100 ) + 1;
inner_size = ( b.width() - 2 ) * ( b.height() - 2 );
inner_area = 0;
for( int row = b.top() + vnoise; row <= b.bottom() - vnoise; ++row )
{
int holes = 0; // FIXME
for( int col = b.left() + 1; col < b.right(); ++col )
{ if( b.get_bit( row, col ) ) ++inner_area; else ++holes; }
if( 5 * holes >= b.width() ) porosity += ( 5 * holes ) / b.width();
}
if( inner_area * 100 < inner_size * 70 ) return 0;
}
else { inner_size = 0; inner_area = b.area(); }
if( Ocrad::similar( b.height(), wp.max(), 20, 2 ) )
{
const int n = std::min( b.height(), b.width() );
if( n >= 6 )
{
int d = 0;
for( int i = 0; i < n; ++i )
{
if( b.get_bit( b.top() + i, b.left() + i ) ) ++d;
if( b.get_bit( b.top() + i, b.right() - i ) ) --d;
}
if( 2 * std::abs( d ) >= n - 1 ) return 0;
}
if( ( !porosity && inner_area * 100 >= inner_size * 75 ) ||
( b.width() >= 7 && b.height() >= 7 &&
( 100 * b.area_octagon() >= 95 * b.size_octagon() ||
100 * b.area_octagon() >= 95 * b.area() ) ) ) return '.';
return 0;
}
if( porosity > 1 || inner_area * 100 < inner_size * 85 ||
( porosity && inner_area * 100 < inner_size * 95 ) ) return 0;
if( b.width() > b.height() )
{
if( b.top() > charbox.vpos( 90 ) ||
( charbox.bottom() - b.bottom() < b.top() - charbox.vcenter() &&
b.width() >= 5 * b.height() ) ) return '_';
return '-';
}
if( b.height() > b.width() )
{
if( b.top() > charbox.vcenter() ) return ',';
if( b.bottom() <= charbox.vcenter() ) return '\'';
return '|';
}
return 0;
}