--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Page.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,131 @@
+/* Page.h
+ The Page class is the primary class used for the OCR system
+ It has two data representations of a page of text, an RLEMap
+ and a BitMap.
+
+*/
+
+#ifndef _PAGE_H
+#define _PAGE_H
+#include "system.h"
+#include "Component.h"
+#include "RLEMap.h"
+#include "BitMap.h"
+#include "LineMarker.h"
+#include "Word.h"
+#include "tcl_interface.h"
+
+class Page {
+public:
+ // Constructor, Destructor
+ friend int main(int argc, char** argv);
+ friend void testocr(int argc, char ** argv);
+ Page();
+ ~Page();
+
+
+ // Read is from 2 level TIFF files.
+ // Calls BitMap function. readMap then converts to RLE
+
+ MapStatus readMap(char * filename); // Calls BitMap::readMap
+
+ Angle skewAngle(); // returns skew estimate
+ int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
+
+ MapStatus setLines();
+ /* Sets fnumlines to the # of text lines
+ and dimensions and sets flineinfo for start and end row
+ for each line. */
+ void Page::display_line_boundaries();
+ /* highlights the space between lines of text in TCL//TK */
+
+ MapStatus extractComponents();
+ /* Extract Component information for each line of text
+ Does connected component analysis then projects up and
+ down to catch circumflexes. A component list is created
+ for each line of text Assumes image has already been deskewed
+ using deskew and that setLines has been run to determine
+ text line boundaries ***/
+
+ MapStatus recognize();
+ /* Recongnize whole page. Run after extractComponents.
+ learn() or readLearnedGroups() must also be run
+ before this function **/
+
+ MapStatus recognize(int linenum); // just one line
+ /* Recognize characters
+ Perform Character Recogition on a line of components.
+ Use the global variable LearnedGroups for comparison.
+ ***/
+
+ MapStatus extractWords();
+ /* Find the start and end of words using avgSpacing and
+ add to word list fWordList */
+
+ void spellcheck();
+ /* spellcheck the list of words (set the mispelled field
+ in each word) */
+
+ int send_words_to_tcl();
+ /* Send words to user interface */
+ int writeWordPos(char * filename);
+ /* Write upper left point coordinates, confidence and translation to
+ file */
+ int writeAscii(char * filename);
+ /** Write words out to acii file **/
+
+
+ void printComponents();
+ /* Prints out a little bitmap for each bad component in the list.
+ Uses ConfidenceThreshold as a cutoff for printing characters.
+ Just used for debugging*/
+ void printComponent(Component* comp);
+
+ void Page::printWords();
+ /* prints out bitmap for each component delimiting between words.*/
+
+
+ int get_height();
+ int get_width();
+ inline BitMap * bmap() {return fBitMap;};
+ inline RLEMap * rmap() {return fRLEMap;};
+ inline Words * words() {return fWordList;};
+ inline Components * line(int i) {return fLineComponents[i];};
+
+ int numLines() {return fnumLines;};
+ LineMarker * lineinfo() {return flineinfo;};
+ int avgSpacing() {return favgSpacing;};
+
+private:
+ int fnumLines; // Number of lines
+ LineMarker * flineinfo; // for each line - start and end row
+ // in RLEMap
+ int favgSpacing ; // Avg spacing between comp (in pixels)
+ Components ** fLineComponents; // A list of components for each line
+
+ Words * fWordList; // A list of words in the document
+ RLEMap * fRLEMap; // Pointer to an RLEMap represntation
+ BitMap * fBitMap; // Pointer to BitMap representation
+
+ int spacing(ListElement * compa, ListElement * compb);
+ // helper function for extractWords
+ // Returns # of horizontal blank pixels between 2 components
+ void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
+ void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
+};
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+