--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/system.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,242 @@
+#include "system.h"
+#include "Point.h"
+
+
+// Global Variables
+Point NOPNT(-1,-1); // Used for default entries
+/* Global variables used to fine tune OCR. These can be adjusted
+ without recompiling by setting them in link_vars.tcl */
+int NoiseTolerance = 1; // Minumum number of pixels in a line of text
+int MinLineSize = 5; // Minimum number of rows in a line of text
+int MinVertSeparation = 0; // Minimum number of rows between lines of text
+int MinHorizSeparation = 1; // Minimum number of cols between characters
+int ConfidenceThreshold = 150; //Minimum confidence for some operations
+int JoinTolerance = 6; // Max number of pixels joining fused chars.
+
+
+
+/* Number of properties in property vector for Components **/
+int numProperties = 30;
+
+/* Grid size for gray scale analysis */
+int NumHorizDiv = 5;
+int NumVertDiv = 5;
+
+// The next four are used in character grouping set in Page::extractComponents
+/* Group 0 - amo
+ Group 1 - Descenders yjp
+ Group 2 - Ascenders JPK
+ Group 3 - Both descenders and Ascenders ()
+ Group 4 - floaters * - `
+*/
+unsigned int NumCharGroups=5;
+int MaxVertSize = 50; // Max vert pixels in char (used for baseline)
+int BaseLineTolerance = 10; // How far in 1/x of line size from base is okay
+int TopLineTolerance = 10; // How far in 1/x of line size from top is okay
+ // 20 = 5%, 10 = 10%
+int MinComponentSize = 16; // Minimum number of pixels in smallest character
+
+uchar CharBitsSet[256]; // Table of number of bits set in each num 0-256
+ // Used for determining gray scale and pixel counts
+
+/** Some globals set in learn() or readLearnedChars(). These are just starting
+ values **/
+
+double MaxHWRatio = 0.0;
+double MinHWRatio = 1000;
+int MinWidth = 1000; // Min component width in learned set
+
+
+
+
+Component * LearnedChars; // Learned character averages /** NOT USED **/
+Components * LearnedGroups=NULL; //Learned character list array by group type
+
+
+/*** Some values for TCL/TK interface. These variables can be
+ set in the file link_vars.tcl without recompiling ***/
+
+int ENABLE_USER_INTERFACE = 0;
+int VERY_LOW_CONFIDENCE = 150;
+int LOW_CONFIDENCE = 200;
+int DISPLAY_LINE_BOUNDARIES = 0;
+int DISPLAY_BOUNDING_BOXES = 0; // boxes around components
+int SPELLCHECK = 0;
+int DISPLAY_IMAGE = 1;
+int DESKEW_METHOD = BITMAP_DESKEW;
+double ZONING_SCALE_FACTOR = .50;
+double SCALE_FACTOR = 0.5;
+
+TclMode mode = REGULAR;
+
+void initCharBitsSet()
+// Initializes lookup table for the number of bits set in a uchar
+{
+ int pixCount;
+ for (int c = 0; c<256;c++)
+ {
+ pixCount = 0;
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+ CharBitsSet[c]=pixCount;
+ }
+}
+
+char* backslashify(char* w)
+/* backslashes all $ " [] {} () */
+{
+ int length = strlen(w);
+ char* new_word = (char*)malloc(length*2);
+ int new_word_pos = 0;
+ for(int i = 0; i < length; i++)
+ {
+ if((w[i] == '$') ||
+ (w[i] == '[') ||
+ (w[i] == ']') ||
+ (w[i] == '\\') ||
+ (w[i] == '{') ||
+ (w[i] == '}') ||
+ (w[i] == '(') ||
+ (w[i] == ')') ||
+ (w[i] == ';'))
+ {
+ new_word[new_word_pos] = '\\';
+ new_word[new_word_pos+1] = w[i];
+ new_word_pos += 2;
+ }
+ else
+ {
+ new_word[new_word_pos] = w[i];
+ new_word_pos += 1;
+ }
+ }
+ new_word[new_word_pos] = '\0';
+ return new_word;
+}
+
+void invertBitsInBuffer(uchar * buf, int size)
+{
+ for(int i = 0; i < size; i++)
+ buf[i] = ~buf[i] ;
+
+}
+
+void clearBitsInBuffer(uchar * buf, int size)
+{
+ for(int i = 0; i < size; i++)
+ buf[i]=0;
+
+
+}
+
+short int countBitsSet(uchar c)
+{
+ int pixCount = 0;
+/*
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+*/
+ if (!(CharBitsSet['f']))
+ initCharBitsSet();
+ return CharBitsSet[c];
+}
+
+int pixelsBetween(uchar * ar, int start, int end)
+{
+ // Counts the number of black pixels between start and end
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int pixCount=0, startOffset, endOffset;
+ uchar nextChar;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // count the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ nextChar = ar[i];
+ pixCount += countBitsSet(nextChar);
+ }
+ // Now add in end peices
+ // Get our part of the starting character
+ // Add in just the last part of the char (get rid of hi bits)
+ nextChar = ar[startCharNum] << startOffset;
+ if (startCharNum != endCharNum )
+ {
+ pixCount += countBitsSet(nextChar);
+ // Get our part of the ending character,
+ // Add in just the first endOffset bits (get rid of lo bits)
+ nextChar = ar[endCharNum] >> (8 - endOffset);
+ pixCount += countBitsSet(nextChar);
+ }
+ else
+ {
+ // just shift the adjusted starting char
+ int shift = (8-endOffset)+startOffset;
+ pixCount += countBitsSet(nextChar >> shift);
+ }
+ return pixCount;
+
+}
+
+
+void setRange(uchar ar[], int start, int end)
+// Sets bits from position start to position end
+{
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int startOffset, endOffset;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // set the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ ar[i] = 255;
+ }
+ // Now set end peices
+ if (startCharNum != endCharNum )
+ {
+ ar[startCharNum] |= (255 >> startOffset);
+ ar[endCharNum] |= (255 << (8 - endOffset));
+ }
+ else // start and end char are the same
+ {
+ char mask = 255 >> startOffset;
+ mask &= 255 << (8-endOffset);
+ ar[endCharNum] |= mask;
+ }
+
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+