Init from working directory of svn repository.
#include "system.h"
#include "Point.h"
// Global Variables
Point NOPNT(-1,-1); // Used for default entries
/* Global variables used to fine tune OCR. These can be adjusted
without recompiling by setting them in link_vars.tcl */
int NoiseTolerance = 1; // Minumum number of pixels in a line of text
int MinLineSize = 5; // Minimum number of rows in a line of text
int MinVertSeparation = 0; // Minimum number of rows between lines of text
int MinHorizSeparation = 1; // Minimum number of cols between characters
int ConfidenceThreshold = 150; //Minimum confidence for some operations
int JoinTolerance = 6; // Max number of pixels joining fused chars.
/* Number of properties in property vector for Components **/
int numProperties = 30;
/* Grid size for gray scale analysis */
int NumHorizDiv = 5;
int NumVertDiv = 5;
// The next four are used in character grouping set in Page::extractComponents
/* Group 0 - amo
Group 1 - Descenders yjp
Group 2 - Ascenders JPK
Group 3 - Both descenders and Ascenders ()
Group 4 - floaters * - `
*/
unsigned int NumCharGroups=5;
int MaxVertSize = 50; // Max vert pixels in char (used for baseline)
int BaseLineTolerance = 10; // How far in 1/x of line size from base is okay
int TopLineTolerance = 10; // How far in 1/x of line size from top is okay
// 20 = 5%, 10 = 10%
int MinComponentSize = 16; // Minimum number of pixels in smallest character
uchar CharBitsSet[256]; // Table of number of bits set in each num 0-256
// Used for determining gray scale and pixel counts
/** Some globals set in learn() or readLearnedChars(). These are just starting
values **/
double MaxHWRatio = 0.0;
double MinHWRatio = 1000;
int MinWidth = 1000; // Min component width in learned set
Component * LearnedChars; // Learned character averages /** NOT USED **/
Components * LearnedGroups=NULL; //Learned character list array by group type
/*** Some values for TCL/TK interface. These variables can be
set in the file link_vars.tcl without recompiling ***/
int ENABLE_USER_INTERFACE = 0;
int VERY_LOW_CONFIDENCE = 150;
int LOW_CONFIDENCE = 200;
int DISPLAY_LINE_BOUNDARIES = 0;
int DISPLAY_BOUNDING_BOXES = 0; // boxes around components
int SPELLCHECK = 0;
int DISPLAY_IMAGE = 1;
int DESKEW_METHOD = BITMAP_DESKEW;
double ZONING_SCALE_FACTOR = .50;
double SCALE_FACTOR = 0.5;
TclMode mode = REGULAR;
void initCharBitsSet()
// Initializes lookup table for the number of bits set in a uchar
{
int pixCount;
for (int c = 0; c<256;c++)
{
pixCount = 0;
for (int i = 7; i >=0; i--)
pixCount +=((c>>i)&1); // if this is a black pixel
CharBitsSet[c]=pixCount;
}
}
char* backslashify(char* w)
/* backslashes all $ " [] {} () */
{
int length = strlen(w);
char* new_word = (char*)malloc(length*2);
int new_word_pos = 0;
for(int i = 0; i < length; i++)
{
if((w[i] == '$') ||
(w[i] == '[') ||
(w[i] == ']') ||
(w[i] == '\\') ||
(w[i] == '{') ||
(w[i] == '}') ||
(w[i] == '(') ||
(w[i] == ')') ||
(w[i] == ';'))
{
new_word[new_word_pos] = '\\';
new_word[new_word_pos+1] = w[i];
new_word_pos += 2;
}
else
{
new_word[new_word_pos] = w[i];
new_word_pos += 1;
}
}
new_word[new_word_pos] = '\0';
return new_word;
}
void invertBitsInBuffer(uchar * buf, int size)
{
for(int i = 0; i < size; i++)
buf[i] = ~buf[i] ;
}
void clearBitsInBuffer(uchar * buf, int size)
{
for(int i = 0; i < size; i++)
buf[i]=0;
}
short int countBitsSet(uchar c)
{
int pixCount = 0;
/*
for (int i = 7; i >=0; i--)
pixCount +=((c>>i)&1); // if this is a black pixel
*/
if (!(CharBitsSet['f']))
initCharBitsSet();
return CharBitsSet[c];
}
int pixelsBetween(uchar * ar, int start, int end)
{
// Counts the number of black pixels between start and end
int startCharNum = start / 8;
int endCharNum = end / 8 ;
int pixCount=0, startOffset, endOffset;
uchar nextChar;
startOffset = start - startCharNum*8; // first bit of range in first char
endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
// count the whole characters
for (int i = startCharNum + 1; i < endCharNum; i++)
{
nextChar = ar[i];
pixCount += countBitsSet(nextChar);
}
// Now add in end peices
// Get our part of the starting character
// Add in just the last part of the char (get rid of hi bits)
nextChar = ar[startCharNum] << startOffset;
if (startCharNum != endCharNum )
{
pixCount += countBitsSet(nextChar);
// Get our part of the ending character,
// Add in just the first endOffset bits (get rid of lo bits)
nextChar = ar[endCharNum] >> (8 - endOffset);
pixCount += countBitsSet(nextChar);
}
else
{
// just shift the adjusted starting char
int shift = (8-endOffset)+startOffset;
pixCount += countBitsSet(nextChar >> shift);
}
return pixCount;
}
void setRange(uchar ar[], int start, int end)
// Sets bits from position start to position end
{
int startCharNum = start / 8;
int endCharNum = end / 8 ;
int startOffset, endOffset;
startOffset = start - startCharNum*8; // first bit of range in first char
endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
// set the whole characters
for (int i = startCharNum + 1; i < endCharNum; i++)
{
ar[i] = 255;
}
// Now set end peices
if (startCharNum != endCharNum )
{
ar[startCharNum] |= (255 >> startOffset);
ar[endCharNum] |= (255 << (8 - endOffset));
}
else // start and end char are the same
{
char mask = 255 >> startOffset;
mask &= 255 << (8-endOffset);
ar[endCharNum] |= mask;
}
};