/* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include "common.h"
#include "rectangle.h"
#include "segment.h"
#include "ucs.h"
#include "bitmap.h"
#include "blob.h"
#include "character.h"
#include "profile.h"
#include "feats.h"
// Recognizes 2 blob characters.
// ijÑñ!%:;=?|¡ª±º¿ÁÉÍÓÚÀÈÌÒÙÂÊÎÔÛáéíóúàèìòùâêîôûÅå
//
void Character::recognize12( const Charset & charset, const Rectangle & charbox )
{
const Blob & b1 = blob( 0 ); // upper blob
const Blob & b2 = blob( 1 ); // lower blob
int a1 = b1.area();
int a2 = b2.area();
Features f1( b1 );
Features f2( b2 );
if( Ocrad::similar( a1, a2, 10 ) )
{
if( !b1.holes() && !b2.holes() &&
2 * a1 > b1.size() && 2 * a2 > b2.size() )
{
if( width() > height() || Ocrad::similar( width(), height(), 40 ) )
{ add_guess( '=', 0 ); return; }
if( Ocrad::similar( b1.width(), b1.height(), 20, 2 ) &&
Ocrad::similar( b2.width(), b2.height(), 20, 2 ) )
add_guess( ':', 0 );
return;
}
return;
}
if( Ocrad::similar( a1, a2, 60 ) )
{
if( f1.test_solid( charbox ) == '.' )
{
if( f2.test_solid( charbox ) == '.' )
{ add_guess( ':', 0 ); return; }
if( b2.height() > b1.height() && b2.top() > charbox.vcenter() )
{ add_guess( ';', 0 ); return; }
}
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
{
int code = f2.test_solid( charbox );
if( code == '-' || code == '_' )
{ add_guess( UCS::PLUSMIN, 0 ); return; }
}
if( b1.includes_hcenter( b2 ) && b2.includes_hcenter( b1 ) )
{
if( b1.holes() && b2.holes() ) { add_guess( 'g', 0 ); return; }
}
if( b1.hcenter() < b2.hcenter() ) // Looks for merged 'fi'
{
if( b2.height() > b2.width() && b1.hcenter() < b2.left() &&
b1.includes_hcenter( b2 ) && 4 * b1.height() > 5 * b2.height() &&
Ocrad::similar( b1.bottom()-b1.top(), b2.bottom()-b1.top(), 10 ) )
{
Character c2( new Blob( b2 ) ); c2.recognize1( charset, charbox );
if( ( c2.maybe('l') || c2.maybe('|') ) &&
set_merged_guess( 'f', b2.left() - 1, 'i', 0 ) ) return;
}
}
}
if( a1 > a2 && b1.hcenter() < b2.hcenter() &&
2 * b1.height() > 3 * b2.height() &&
b1.holes() == 1 && b2.holes() == 1 &&
Ocrad::similar( b2.width(), b2.height(), 50 ) )
{ add_guess( '%', 0 ); return; }
if( a1 < a2 )
{
{
int code = f1.test_solid( charbox ); //FIXME all this
if( code == '-' && 2 * b1.height() > b1.width() ) code = '.';
else if( code == '\'' || code == '|' ) code = '.';
if( !code && !b1.holes() &&
2 * b1.height() < b2.height() && b1.width() <= b2.width() )
{
if( 10 * a1 >= 7 * b1.height() * b1.width() ) code = '.';
else code = '\'';
}
if( !b2.holes() && ( code == '.' || code == '\'' ) )
{
// Looks for merged 'ri' or 'rí'
if( f2.bp.minima( b2.height() / 4 ) == 2 &&
b2.top() > b1.bottom() && b2.hcenter() < b1.left() )
{
Character c2( new Blob( b2 ) ); c2.recognize1( charset, charbox );
if( c2.maybe('n') )
{
if( code == '.' && ( b1.left() < b2.hcenter() || b1.right() > b2.right() ) )
{ add_guess( 'n', 0 ); return; } // FIXME remove dot
int col, limit = b2.seek_right( b2.vcenter(), b2.hcenter() );
for( col = b2.hcenter(); col <= limit; ++col )
if( b2.seek_bottom( b2.vcenter(), col ) < b2.bottom() ) break;
if( b2.left() < col && col < b2.right() )
{
if( charset.enabled( Charset::iso_8859_9 ) && f2.rp.istip() )
set_merged_guess( 'T', col - 1, UCS::CIDOT, 1 );
else
{
const int code2 = ( ( code == '.' ) ? 'i' : (int)UCS::SIACUTE );
set_merged_guess( 'r', col - 1, code2, 1 );
}
return;
}
}
}
if( code == '.' && f2.bp.minima( b2.height() / 4 ) == 1 &&
b1.bottom() <= b2.top() && f2.rp.minima( b2.width() / 2 ) <= 2 )
{
int hdiff;
if( b2.bottom_hook( &hdiff ) && std::abs( hdiff ) >= b2.height() / 2 )
{
if( hdiff > 0 && f2.rp.increasing( f2.rp.pos( 80 ) ) )
{ add_guess( 'j', 0 ); return; }
if( hdiff < 0 )
{
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
if( -4 * hdiff <= 3 * b2.height() &&
f2.wp.max() > 2 * f1.wp.max() && f2.lp.minima() == 1 &&
2 * f2.bp[0] < b2.height() )
{ add_guess( UCS::IQUEST, 0 ); return; }
add_guess( 'i', 0 ); return;
}
}
if( f2.tp.minima() == 1 )
{
const bool maybe_j = ( b2.height() > charbox.height() &&
b2.vpos( 80 ) > charbox.bottom() );
if( Ocrad::similar( f1.wp.max(), f2.wp.max(), 20 ) )
{
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
if( !f2.lp.isctip() && f2.wp.max() >= f1.wp.max() &&
( 3 * f2.wp[f2.wp.pos(10)] < 2 * f1.wp.max() ||
( b1.left() <= b2.left() && b2.vpos( 80 ) > charbox.bottom() ) ) )
{ add_guess( UCS::IEXCLAM, 0 ); return; }
if( maybe_j ) add_guess( 'j', 0 ); else add_guess( 'i', 0 );
return;
}
if( 3 * f2.wp.max() > 4 * f1.wp.max() &&
b2.seek_bottom( b2.vcenter(), b2.hpos( 10 ) ) < b2.bottom() &&
f2.rp.increasing( f2.rp.pos( 75 ) ) &&
( b1.left() >= b2.hcenter() ||
b2.seek_top( b2.vcenter(), b2.hpos( 10 ) ) <= b2.top() ) )
{ add_guess( 'j', 0 ); return; }
if( charset.enabled( Charset::iso_8859_9 ) && f2.rp.istip() )
{ add_guess( UCS::CIDOT, 0 ); return; }
if( maybe_j ) add_guess( 'j', 0 ); else add_guess( 'i', 0 );
return;
}
}
}
}
if( ( !b1.holes() && ( b1.bottom() < b2.vcenter() || 2 * a1 < a2 ) ) ||
( b1.holes() == 1 && b1.bottom() < b2.top() &&
b2.top() - b1.bottom() < b1.height() ) )
{
Character c( new Blob( b2 ) ); c.recognize1( charset, charbox );
if( c.guesses() )
{
int code = c.guess( 0 ).code;
if( b1.holes() == 1 )
{
if( code == 'a' ) code = UCS::SARING;
else if( code == 'A' ) code = UCS::CARING;
else code = 0;
}
else if( code == 'u' &&
5 * b1.width() <= b2.width() && 5 * b1.height() <= b2.width() )
return;
else if( b1.bottom() < b2.vcenter() )
{
int atype = '\'';
if( UCS::isvowel( code ) && 2 * b1.width() > 3 * b1.height() &&
!f1.tp.iscpit() && f1.hp.iscpit() ) atype = ':';
else if( f1.bp.minima() == 2 || f1.bp.istip() ) atype = '^';
else if( std::min( b1.height(), b1.width() ) >= 5 &&
( f1.rp.decreasing() || f1.tp.increasing() ) &&
( f1.bp.decreasing() || f1.lp.increasing() ) ) atype = '`';
code = UCS::compose( code, atype );
}
if( code != c.guess( 0 ).code && charset.only( Charset::ascii ) )
{
if( UCS::base_letter( code ) == 'i' ) code = 'i';
else code = c.guess( 0 ).code;
}
if( code ) add_guess( code, 0 );
}
}
return;
}
if( b1.bottom() <= b2.top() )
{
const int code = f2.test_solid( charbox );
if( !b1.holes() && ( code == '.' ||
( code && Ocrad::similar( b2.height(), b2.width(), 50 ) ) ) )
{
if( Ocrad::similar( b1.width(), b2.width(), 50 ) && !f1.lp.isctip() )
{ add_guess( '!', 0 ); return; }
if( f1.bp.minima() == 1 ) add_guess( '?', 0 );
return;
}
if( code == '-' || code == '_' )
if( charset.enabled( Charset::iso_8859_15 ) ||
charset.enabled( Charset::iso_8859_9 ) )
{
if( b1.holes() == 1 )
{
const Bitmap & h = b1.hole( 0 );
if( b2.width() >= h.width() && b2.top() - b1.bottom() < h.height() )
{
if( Ocrad::similar( h.left() - b1.left(), b1.right() - h.right(), 40 ) )
{ add_guess( UCS::MASCORD, 0 ); return; }
add_guess( UCS::FEMIORD, 0 ); return;
}
}
}
}
}