reference/ocr-new/Page.h
author viric@llimona
Thu, 18 May 2006 23:12:51 +0200
changeset 0 6b8091ca909a
permissions -rw-r--r--
Init from working directory of svn repository.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     1
/* Page.h  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     2
   The Page class is the primary class used for the OCR system
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     3
   It has two data representations of a page of text, an RLEMap
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     4
   and a BitMap.  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     5
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     6
*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     7
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     8
#ifndef _PAGE_H
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     9
#define _PAGE_H
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    10
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    11
#include "system.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    12
#include "EqnMarker.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    13
#include "Component.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    14
#include "RLEMap.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    15
#include "BitMap.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    16
#include "LineMarker.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    17
#include "Word.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    18
#include "tcl_interface.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    19
#include "Zone.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    20
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    21
class Zones;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    22
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    23
class Page {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    24
public:
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    25
	// Constructor, Destructor
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    26
	friend main();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    27
	friend void testocr(int argc, char ** argv);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    28
	Page();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    29
	~Page();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    30
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    31
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    32
	// Read is from 2 level TIFF files.  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    33
        // Calls BitMap function. readMap then converts to RLE
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    34
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    35
        MapStatus readMap(char * filename);    // Calls BitMap::readMap
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    36
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    37
	Angle skewAngle();	   	// returns skew estimate       
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    38
	int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    39
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    40
        MapStatus setLines();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    41
        /* Sets fnumlines to the # of text lines
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    42
	   and dimensions and sets flineinfo for start and end row
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    43
	   for each line.  */ 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    44
	void Page::display_line_boundaries();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    45
	/* highlights the space between lines of text in TCL//TK */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    46
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    47
	 MapStatus extractComponents(int horizMerge);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    48
	 /* Extract Component information for each line of text
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    49
	    Does connected component analysis then projects up and
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    50
            down to catch circumflexes. A component list is created
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    51
	    for each line of text Assumes image has already been deskewed 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    52
	    using deskew and that  setLines has been run to determine
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    53
            text line boundaries ***/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    54
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    55
	MapStatus recognize();           
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    56
	/* Recongnize whole page. Run after extractComponents.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    57
	   learn() or readLearnedGroups() must also be run 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    58
           before this function **/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    59
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    60
	MapStatus recognize(int linenum);   // just one line
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    61
      	/* Recognize characters 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    62
	   Perform Character Recogition on a line of components.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    63
	   Use the global variable LearnedGroups for comparison.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    64
         ***/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    65
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    66
	MapStatus extractWords();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    67
	/* Find the start and end of words using avgSpacing and
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    68
           add to word list fWordList */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    69
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    70
	void spellcheck();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    71
	/* spellcheck the list of words (set the mispelled field
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    72
	   in each word) */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    73
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    74
	int send_words_to_tcl();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    75
        /* Send words to user interface */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    76
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    77
       /*  Output options */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    78
	int writeWordPos(char * filename); 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    79
	/* Write upper left point coordinates, confidence and translation to 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    80
           file*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    81
	int writeWordbox(char * filename, int xoffset, int yoffset, 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    82
			 bool equationsOnly);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    83
	/* Write out to scanwrx format */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    84
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    85
	int writeAscii(char * filename);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    86
	/** Write words out to acii file **/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    87
        
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    88
        int writeEquations(char * filename, int lineoffset);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    89
        /*** Write boundaries of equations */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    90
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    91
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    92
	void printComponents();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    93
	/* Prints out a little bitmap for each bad component in the list.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    94
	   Uses ConfidenceThreshold as a cutoff for printing characters. 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    95
	   Just used for debugging*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    96
	void printComponent(Component* comp);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    97
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    98
	void Page::printWords();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    99
	/* prints out bitmap for each component delimiting between words.*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   100
	
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   101
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   102
	int get_height();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   103
	int get_width();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   104
        int get_linenum(int x, int y); // returns line number of x,y coordinates
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   105
        int get_linenum(Point p) { return get_linenum(p.x(), p.y()); }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   106
        int get_linenum(Component * comp) {return get_linenum(comp->center()); }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   107
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   108
	inline BitMap * bmap() {return fBitMap;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   109
	inline RLEMap * rmap() {return fRLEMap;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   110
	inline Words * words() {return fWordList;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   111
  inline Components ** components() {return fLineComponents;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   112
	inline Components * line(int i) {return fLineComponents[i];};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   113
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   114
	int numLines() {return fnumLines;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   115
	LineMarker * lineinfo() {return flineinfo;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   116
	int avgSpacing() {return favgSpacing;};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   117
        /* Functions to handle equation marking */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   118
        int addEquation(int startline, int startcol, int endline, int endcol);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   119
        int deleteEquation(int x,int y); // deletes equation with this coordinate.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   120
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   121
       Component * compAt(Point p); // returns pointer to smallest 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   122
                                    //comp containing p
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   123
        bool inEquation(int x, int y);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   124
        bool inEquation(ListElement * comp);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   125
        void join(Point a, Point b) { join(compAt(a), compAt(b));};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   126
        void join(Component * a, Component * b);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   127
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   128
        int thinnestHorizontalSplit(Components * complist, 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   129
				     ListElement * compptr);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   130
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   131
        int thinnestHorizontalSplit(Component * comp);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   132
  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   133
        int horizontalCompSplit(Components * complist,
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   134
				ListElement * compptr, int x);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   135
        int verticalSplit(Components * complist, 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   136
			  ListElement * compptr, int y);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   137
  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   138
private:
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   139
	 int fnumLines;                   // Number of lines
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   140
	LineMarker * flineinfo;          // for each line - start and end row 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   141
	                                 // in RLEMap
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   142
        int favgSpacing  ;              // Avg spacing between comp (in pixels) 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   143
        Components ** fLineComponents;   // A list of components for each line
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   144
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   145
	Words * fWordList;                // A list of words in the document  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   146
        RLEMap * fRLEMap;                // Pointer to an RLEMap represntation
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   147
	BitMap * fBitMap;                // Pointer to BitMap representation
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   148
        EqnMarkers * fEqnList;            // pointer to equation list
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   149
        int spacing(ListElement * compa, ListElement * compb);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   150
	  // helper function for extractWords
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   151
	  // Returns # of horizontal blank pixels between 2 components
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   152
	void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   153
	void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   154
        void setTclDeleteVars(EqnMarker * eqn);  // helper function for deleteEquation
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   155
        
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   156
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   157
};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   158
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   159
class ZonedPage:public Page
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   160
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   161
public:
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   162
  ZonedPage();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   163
  ~ZonedPage();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   164
  void autoZone(int horizMerge, int vertMerge);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   165
  Page * activate(int x, int y); // activate the page at Point(x,y)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   166
  void recognizeAll();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   167
  Zones * zones();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   168
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   169
private:
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   170
  Zones * fzones;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   171
};
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   172
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   173
#endif
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   174
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   175
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   176
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   177
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   178
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   179
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   180
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   181
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   182
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   183
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   184
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   185
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   186
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   187
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   188