reference/ocr-new/Page.h
changeset 0 6b8091ca909a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Page.h	Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,188 @@
+/* Page.h  
+   The Page class is the primary class used for the OCR system
+   It has two data representations of a page of text, an RLEMap
+   and a BitMap.  
+
+*/
+
+#ifndef _PAGE_H
+#define _PAGE_H
+
+#include "system.h"
+#include "EqnMarker.h"
+#include "Component.h"
+#include "RLEMap.h"
+#include "BitMap.h"
+#include "LineMarker.h"
+#include "Word.h"
+#include "tcl_interface.h"
+#include "Zone.h"
+
+class Zones;
+
+class Page {
+public:
+	// Constructor, Destructor
+	friend main();
+	friend void testocr(int argc, char ** argv);
+	Page();
+	~Page();
+
+
+	// Read is from 2 level TIFF files.  
+        // Calls BitMap function. readMap then converts to RLE
+
+        MapStatus readMap(char * filename);    // Calls BitMap::readMap
+
+	Angle skewAngle();	   	// returns skew estimate       
+	int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
+
+        MapStatus setLines();
+        /* Sets fnumlines to the # of text lines
+	   and dimensions and sets flineinfo for start and end row
+	   for each line.  */ 
+	void Page::display_line_boundaries();
+	/* highlights the space between lines of text in TCL//TK */
+
+	 MapStatus extractComponents(int horizMerge);
+	 /* Extract Component information for each line of text
+	    Does connected component analysis then projects up and
+            down to catch circumflexes. A component list is created
+	    for each line of text Assumes image has already been deskewed 
+	    using deskew and that  setLines has been run to determine
+            text line boundaries ***/
+
+	MapStatus recognize();           
+	/* Recongnize whole page. Run after extractComponents.
+	   learn() or readLearnedGroups() must also be run 
+           before this function **/
+
+	MapStatus recognize(int linenum);   // just one line
+      	/* Recognize characters 
+	   Perform Character Recogition on a line of components.
+	   Use the global variable LearnedGroups for comparison.
+         ***/
+
+	MapStatus extractWords();
+	/* Find the start and end of words using avgSpacing and
+           add to word list fWordList */
+
+	void spellcheck();
+	/* spellcheck the list of words (set the mispelled field
+	   in each word) */
+
+	int send_words_to_tcl();
+        /* Send words to user interface */
+
+       /*  Output options */
+	int writeWordPos(char * filename); 
+	/* Write upper left point coordinates, confidence and translation to 
+           file*/
+	int writeWordbox(char * filename, int xoffset, int yoffset, 
+			 bool equationsOnly);
+	/* Write out to scanwrx format */
+
+	int writeAscii(char * filename);
+	/** Write words out to acii file **/
+        
+        int writeEquations(char * filename, int lineoffset);
+        /*** Write boundaries of equations */
+
+
+	void printComponents();
+	/* Prints out a little bitmap for each bad component in the list.
+	   Uses ConfidenceThreshold as a cutoff for printing characters. 
+	   Just used for debugging*/
+	void printComponent(Component* comp);
+
+	void Page::printWords();
+	/* prints out bitmap for each component delimiting between words.*/
+	
+
+	int get_height();
+	int get_width();
+        int get_linenum(int x, int y); // returns line number of x,y coordinates
+        int get_linenum(Point p) { return get_linenum(p.x(), p.y()); }
+        int get_linenum(Component * comp) {return get_linenum(comp->center()); }
+
+	inline BitMap * bmap() {return fBitMap;};
+	inline RLEMap * rmap() {return fRLEMap;};
+	inline Words * words() {return fWordList;};
+  inline Components ** components() {return fLineComponents;};
+	inline Components * line(int i) {return fLineComponents[i];};
+
+	int numLines() {return fnumLines;};
+	LineMarker * lineinfo() {return flineinfo;};
+	int avgSpacing() {return favgSpacing;};
+        /* Functions to handle equation marking */
+        int addEquation(int startline, int startcol, int endline, int endcol);
+        int deleteEquation(int x,int y); // deletes equation with this coordinate.
+
+       Component * compAt(Point p); // returns pointer to smallest 
+                                    //comp containing p
+        bool inEquation(int x, int y);
+        bool inEquation(ListElement * comp);
+        void join(Point a, Point b) { join(compAt(a), compAt(b));};
+        void join(Component * a, Component * b);
+
+        int thinnestHorizontalSplit(Components * complist, 
+				     ListElement * compptr);
+
+        int thinnestHorizontalSplit(Component * comp);
+  
+        int horizontalCompSplit(Components * complist,
+				ListElement * compptr, int x);
+        int verticalSplit(Components * complist, 
+			  ListElement * compptr, int y);
+  
+private:
+	 int fnumLines;                   // Number of lines
+	LineMarker * flineinfo;          // for each line - start and end row 
+	                                 // in RLEMap
+        int favgSpacing  ;              // Avg spacing between comp (in pixels) 
+        Components ** fLineComponents;   // A list of components for each line
+
+	Words * fWordList;                // A list of words in the document  
+        RLEMap * fRLEMap;                // Pointer to an RLEMap represntation
+	BitMap * fBitMap;                // Pointer to BitMap representation
+        EqnMarkers * fEqnList;            // pointer to equation list
+        int spacing(ListElement * compa, ListElement * compb);
+	  // helper function for extractWords
+	  // Returns # of horizontal blank pixels between 2 components
+	void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
+	void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
+        void setTclDeleteVars(EqnMarker * eqn);  // helper function for deleteEquation
+        
+
+};
+
+class ZonedPage:public Page
+{
+public:
+  ZonedPage();
+  ~ZonedPage();
+  void autoZone(int horizMerge, int vertMerge);
+  Page * activate(int x, int y); // activate the page at Point(x,y)
+  void recognizeAll();
+  Zones * zones();
+
+private:
+  Zones * fzones;
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+