/* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include "common.h"
#include "rectangle.h"
#include "segment.h"
#include "ucs.h"
#include "bitmap.h"
#include "blob.h"
#include "character.h"
#include "profile.h"
#include "feats.h"
// First attempt at recognition without relying on context.
//
void Character::recognize1( const Charset & charset, const Rectangle & charbox )
{
if( blobs() == 1 )
{
const Blob & b = blob( 0 );
if( b.holes() == 0 ) recognize110( charset, charbox );
else if( b.holes() == 1 ) recognize111( charset, charbox );
else if( b.holes() == 2 ) recognize112( charbox );
}
else if( blobs() == 2 ) recognize12( charset, charbox );
else if( blobs() == 3 ) recognize13( charset, charbox );
}
// Recognizes 1 blob characters without holes.
// 12357CEFGHIJKLMNSTUVWXYZcfhklmnrstuvwxyz
// '()*+,-./<>@[\]^_`{|}~¬
//
void Character::recognize110( const Charset & charset, const Rectangle & charbox )
{
const Blob & b = blob( 0 );
Features f( b );
int code = f.test_easy( charbox );
if( code )
{
if( code == '.' && b.width() > b.height() && b.v_includes( charbox.vcenter() ) )
{ add_guess( code, 1 ); add_guess( '-', 0 ); return; }
add_guess( code, 0 ); return;
}
if( b.height() < 5 || ( b.height() < 8 && b.width() < 6 ) ||
b.height() > 10 * b.width() || 5 * b.height() < b.width() ) return;
code = f.test_EFIJLlT( charset, charbox );
if( code ) { add_guess( code, 0 ); return; }
code = f.test_frst( charbox ); if( code ) { add_guess( code, 0 ); return; }
code = f.test_G(); if( code ) { add_guess( code, 0 ); return; }
code = f.test_c(); if( code ) { add_guess( code, 0 ); return; }
if( charset.enabled( Charset::iso_8859_9 ) )
{ code = f.test_s_cedilla(); if( code ) { add_guess( code, 0 ); return; } }
code = f.test_235Esz( charset ); if( code ) { add_guess( code, 0 ); return; }
code = f.test_HKMNUuvwYy( charbox );
if( code == 'u' && f.lp.istpit() ) // Looks for merged 'tr'
{
int col = b.seek_left( b.vcenter(), b.right() );
if( col < b.hpos( 90 ) && !b.escape_top( b.vcenter(), col ) )
{
col = b.seek_left( b.vcenter(), col - 1, false );
while( --col > b.hpos( 40 ) &&
( b.seek_top( b.vcenter(), col ) > b.top() ||
f.hp[col-b.left()] > b.height() / 10 ) ) ;
if( col > b.hpos( 40 ) && col < b.right() &&
set_merged_guess( 't', col, 'r', 0 ) ) return;
}
}
if( code == 'N' && b.width() > b.height() && b.top() >= charbox.top() &&
4 * f.tp[f.tp.pos(50)] < b.height() )
{ // Looks for merged 'rv'
const int col = f.hp.iminimum();
if( col >= f.hp.pos( 40 ) && col < f.hp.pos( 50 ) &&
set_merged_guess( 'r', b.left() + col, 'v', 0 ) ) return;
}
if( code ) { add_guess( code, 0 ); return; }
const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
if( f.bp.minima() <= 2 &&
( f.bp.minima( b.height() / 8 + noise ) == 2 ||
( b.height() >= 16 && f.bp.minima( b.height() / 8 ) == 2 ) ) )
{
code = f.test_hknwx( charbox );
if( code == 'n' ) // Looks for '"' or merged 'rt' or 'fl'
{
if( b.bottom() <= charbox.vcenter() ) { add_guess( '"', 0 ); return; }
if( b.width() > b.height() && 10 * f.lp[f.lp.pos(10)] < b.width() &&
!f.rp.increasing( f.rp.pos( 75 ) ) )
{
const int rgap = f.rp[f.rp.pos(50)];
if( 10 * rgap > b.width() && !b.escape_top( b.vcenter(), b.right() ) )
return; // leave 'rr', 'TT', 'rz', 'FT' etc, for next pass
}
if( 2 * f.lp[f.lp.pos(10)] > b.width() && !f.rp.increasing( f.rp.pos( 75 ) ) )
{
const int col = b.seek_left( b.vcenter(), b.right() );
if( col <= b.hpos( 95 ) && !b.escape_top( b.vcenter(), col ) &&
set_merged_guess( 'r', b.hcenter(), 't', 0 ) ) return;
}
if( f.rp.minima() == 1 && !f.rp.increasing( f.rp.pos( 75 ) ) )
{
int dmax = 0; bool bar = false;
for( int row = b.vpos( 60 ); row > b.vpos( 25 ); --row )
{
int d = b.hcenter() - b.seek_left( row, b.hcenter() );
if( d > dmax ) dmax = d;
else if( 2 * d < dmax && dmax > 2 ) bar = true;
if( bar && Ocrad::similar( d, dmax, 25 ) )
{
int col, limit = b.seek_right( b.vcenter(), b.hcenter() );
for( col = b.hcenter(); col <= limit; ++col )
if( b.seek_bottom( b.vcenter(), col ) < b.bottom() ) break;
if( col > b.left() && col < b.right() &&
set_merged_guess( 'f', col - 1, 'l', 0 ) ) return;
}
}
}
}
else if( code == 'h' ) // Looks for merged 'rf' or 'fi'
{
if( 2 * f.lp[f.lp.pos(10)] > b.width() )
{
if( f.rp[f.rp.pos(70)] >= 2 &&
b.seek_top( b.vpos( 70 ), b.right() ) > b.top() )
{
int col = 0, hmin = f.hp.range() + 1;
for( int i = b.hpos( 40 ); i <= b.hpos( 60 ); ++i )
if( f.hp[i-b.left()] < hmin )
{ hmin = f.hp[i-b.left()]; col = i; }
if( col > b.left() && col < b.right() )
set_merged_guess( 'r', col - 1, 'f', 0 );
}
return;
}
if( f.rp.isctip( 30 ) )
{ set_merged_guess( 'f', b.hcenter(), 'i', 0 ); return; }
}
else if( code == 'k' ) // Looks for merged 'rt'
{
if( 2 * f.lp[f.lp.pos(10)] > b.width() &&
!f.rp.increasing( f.rp.pos( 75 ) ) &&
set_merged_guess( 'r', b.hcenter(), 't', 0 ) ) return;
}
if( code ) { add_guess( code, 0 ); return; }
}
if( f.bp.minima() == 3 )
{
if( f.bp.minima( b.height() / 2 ) == 1 && f.tp.minima() == 3 &&
f.lp.minima() == 2 && f.rp.minima() == 2 )
{ add_guess( '*', 0 ); return; }
if( b.id( b.vcenter(), b.hcenter() ) == 0 &&
b.id( b.vcenter() - 1, b.hcenter() ) == 0 &&
b.id( b.vcenter() + 1, b.hcenter() ) == 0 &&
b.seek_left( b.vcenter(), b.hcenter() ) <= b.hpos( 25 ) )
{ // Found merged 'rn'
int row = b.vpos( 95 );
int col = b.seek_right( row, b.left() );
col = b.seek_right( row, col + 1, false );
col = b.seek_right( row, col + 1 );
if( col > b.left() && col < b.right() &&
set_merged_guess( 'r', col, 'n', 0 ) ) return;
}
if( f.tp.minima( b.height() / 3 ) == 1 ) add_guess( 'm', 0 );
return;
}
if( f.bp.minima() == 4 && f.tp.minima( b.height() / 3 ) == 1 )
{ // Found merged 'rm'
int row = b.vpos( 95 );
int col = b.seek_right( row, b.left() );
col = b.seek_right( row, col + 1, false );
col = b.seek_right( row, col + 1 );
if( col > b.left() && col < b.right() &&
set_merged_guess( 'r', col, 'm', 0 ) ) return;
}
if( f.tp.minima( b.height() / 4 ) == 3 )
{
if( f.segments_in_row( b.vcenter() ) == 2 &&
f.segments_in_row( b.vpos( 80 ) ) == 3 ) return; // merged 'IX'
int hdiff;
if( !b.bottom_hook( &hdiff ) &&
( f.segments_in_row( b.vcenter() ) < 4 ||
!b.escape_top( b.vcenter(), b.hcenter() ) ) )
add_guess( 'w', 0 );
return;
}
code = f.test_line( charbox );
if( code ) { add_guess( code, 0 ); return; }
code = f.test_misc( charbox );
if( code ) { add_guess( code, 0 ); return; }
}
// Recognizes 1 blob characters with 1 hole.
// 0469ADOPQRabdegopq#
//
void Character::recognize111( const Charset & charset, const Rectangle & charbox )
{
const Blob & b = blob( 0 );
const Bitmap & h = b.hole( 0 );
if( !h.is_hcentred_in( b ) ) return;
Features f( b );
int top_delta = h.top() - b.top(), bottom_delta = b.bottom() - h.bottom();
if( std::abs( top_delta - bottom_delta ) <= std::max( 2, h.height() / 4 ) ||
Ocrad::similar( top_delta, bottom_delta, 40, 2 ) )
{ // hole is vertically centred
int code = f.test_4ADQao( charset, charbox );
if( code )
{
if( code == 'Q' && Ocrad::similar( top_delta, bottom_delta, 40, 2 ) )
add_guess( 'a', 1 );
add_guess( code, 0 );
}
return;
}
if( top_delta < bottom_delta ) // hole is high
{
int code = f.test_49ARegpq( charbox );
if( code ) add_guess( code, 0 );
return;
}
if( top_delta > bottom_delta ) // hole is low
{
int code = f.test_6abd( charset );
if( code )
{
add_guess( code, 0 );
if( code == UCS::SOACUTE )
{
int row = h.top() - ( b.bottom() - h.bottom() ) - 1;
if( row <= b.top() || row + 1 >= h.top() ) return;
Blob & b1 = const_cast< Blob & >( b );
Blob b2( b );
b1.bottom( row ); b2.top( row + 1 );
blobpv.push_back( new Blob( b2 ) );
}
}
}
}
// Recognizes 1 blob characters with 2 holes.
// 8BQg$&
//
void Character::recognize112( const Rectangle & charbox )
{
const Blob & b = blob( 0 );
const Bitmap & h1 = b.hole( 0 ); // upper hole
const Bitmap & h2 = b.hole( 1 ); // lower hole
Profile lp( b, Profile::left );
Profile tp( b, Profile::top );
Profile rp( b, Profile::right );
Profile bp( b, Profile::bottom );
// Check for 'm' or 'w' with merged serifs
if( 10 * std::abs( h2.vcenter() - h1.vcenter() ) <= b.height() &&
h1.is_vcentred_in( b ) && h2.is_vcentred_in( b ) )
{
if( ( b.bottom() - h1.bottom() <= h1.top() - b.top() ) &&
( b.bottom() - h2.bottom() <= h2.top() - b.top() ) && bp.isflats() )
{ add_guess( 'm', 0 ); return; }
if( 5 * std::abs( h1.bottom() - b.vcenter() ) <= b.height() &&
5 * std::abs( h2.bottom() - b.vcenter() ) <= b.height() &&
tp.isflats() && bp.minima() == 2 )
{ add_guess( 'w', 0 ); return; }
return;
}
if( !h1.is_hcentred_in( b ) ) return;
if( !h2.is_hcentred_in( b ) ) return;
if( h1.left() > b.hcenter() && h2.left() > b.hcenter() ) return;
if( h1.right() < b.hpos( 40 ) && h2.right() < b.hpos( 40 ) ) return;
if( h1.top() > b.vcenter() || h2.bottom() < b.vcenter() ) return;
const int a1 = h1.area();
const int a2 = h2.area();
{
const int w = b.right() - std::min( b.hcenter(),
std::min( h1.hcenter(), h2.hcenter() ) );
for( int i = h1.bottom() - b.top() + 1; i < h2.top() - b.top(); ++i )
if( rp[i] > w ) { add_guess( 'g', 2 ); return; }
}
if( Ocrad::similar( a1, a2, 50 ) ) // I don't like this
{
if( h1.bottom() > b.vcenter() && h2.top() < b.vcenter() &&
h1.h_overlaps( h2 ) && !h1.h_includes( h2 ) )
{ add_guess( '0', 0 ); return; }
if( h1.bottom() <= h2.top() )
{
int hdiff;
if( b.bottom_hook( &hdiff ) && hdiff > b.height() / 2 )
if( b.top_hook( &hdiff ) && hdiff > b.height() / 2 )
{ add_guess( 's', 0 ); return; }
if( lp.isflats() && ( lp.istip() || ( lp.isflat() && b.test_BD() ) ) )
{ add_guess( 'B', 0 ); return; }
int col1 = h1.seek_left( h1.bottom(), h1.right() + 1 ) - 1;
int col2 = h2.seek_right( h2.top(), h2.left() - 1 ) + 1;
if( col1 <= col2 )
{
if( lp.isconvex() || lp.ispit() ) add_guess( 'e', 1 );
else if( !rp.isctip() && tp.minima() == 1 ) add_guess( 'a', 1 );
if( bp.istpit() ) { add_guess( '$', 0 ); return; }
}
if( b.hcenter() > h1.hcenter() && b.hcenter() > h2.hcenter() &&
( b.hcenter() >= h1.right() || b.hcenter() >= h2.right() ) )
{ add_guess( '&', 0 ); return; }
for( int row = h1.bottom() + 1; row < h2.top(); ++row )
if( !b.get_bit( row, hcenter() ) )
{ add_guess( 'g', 0 ); return; }
if( charbox.bottom() > h2.vcenter() &&
( bp.isconvex() || ( bp.ispit() && tp.ispit() ) ) )
{
if( b.top() >= charbox.top() && b.height() <= charbox.height() )
{
if( ( lp.ispit() || lp.isconvex() ) &&
( !rp.ispit() || h2.right() > h1.right() ) )
add_guess( 'e', 1 );
else if( b.right() - rp[rp.pos(50)] > h1.right() && !rp.isctip() )
add_guess( 'a', 1 );
}
if( h1.bottom() > b.vcenter() && h1.top() > b.vpos( 30 ) )
add_guess( UCS::SEACUTE, 0 );
else add_guess( '8', 0 );
return;
}
if( lp.minima() == 2 && rp.minima() == 1 )
{
if( charbox.vcenter() < h1.bottom() && charbox.bottom() < h2.bottom() )
add_guess( 'g', 0 );
else add_guess( 'a', 0 );
return;
}
if( charbox.vcenter() > h1.top() &&
( charbox.vcenter() < h1.bottom() || charbox.bottom() < h2.vcenter() ) )
add_guess( 'g', 2 );
add_guess( 'B', 1 ); add_guess( 'a', 0 );
return;
}
}
if( a1 > a2 && h1.h_overlaps( h2 ) )
{
if( !h1.v_overlaps( h2 ) )
{
if( h2.left() > b.hcenter() && h2.bottom() < b.bottom() - h1.height() )
add_guess( '9', 0 );
else add_guess( 'g', 0 );
return;
}
if( h1.h_includes( h2 ) ) { add_guess( 'Q', 0 ); return; }
return;
}
if( a1 < a2 && tp.minima() == 1 )
{
if( h1.h_overlaps( h2 ) )
{
if( rp.minima() == 1 )
{
if( 2 * h1.height() > h2.height() && 2 * h1.width() > h2.width() &&
3 * h2.width() >= b.width() && !lp.isctip() )
{ if( lp.ispit() && lp.isconvex() ) add_guess( '6', 0 );
else add_guess( 'B', 0 ); }
else if( h2.right() < b.hcenter() ) add_guess( '&', 0 );
else add_guess( 'a', 0 );
return;
}
if( !h1.v_overlaps( h2 ) &&
h1.right() < b.hcenter() && h1.top() > b.top() + h1.height() )
{ add_guess( '6', 0 ); return; }
}
if( h1.bottom() < h2.top() ) { add_guess( '&', 0 ); return; }
}
}