0
|
1 |
/* Page.h
|
|
2 |
The Page class is the primary class used for the OCR system
|
|
3 |
It has two data representations of a page of text, an RLEMap
|
|
4 |
and a BitMap.
|
|
5 |
|
|
6 |
*/
|
|
7 |
|
|
8 |
#ifndef _PAGE_H
|
|
9 |
#define _PAGE_H
|
|
10 |
|
|
11 |
#include "system.h"
|
|
12 |
#include "EqnMarker.h"
|
|
13 |
#include "Component.h"
|
|
14 |
#include "RLEMap.h"
|
|
15 |
#include "BitMap.h"
|
|
16 |
#include "LineMarker.h"
|
|
17 |
#include "Word.h"
|
|
18 |
#include "tcl_interface.h"
|
|
19 |
#include "Zone.h"
|
|
20 |
|
|
21 |
class Zones;
|
|
22 |
|
|
23 |
class Page {
|
|
24 |
public:
|
|
25 |
// Constructor, Destructor
|
|
26 |
friend main();
|
|
27 |
friend void testocr(int argc, char ** argv);
|
|
28 |
Page();
|
|
29 |
~Page();
|
|
30 |
|
|
31 |
|
|
32 |
// Read is from 2 level TIFF files.
|
|
33 |
// Calls BitMap function. readMap then converts to RLE
|
|
34 |
|
|
35 |
MapStatus readMap(char * filename); // Calls BitMap::readMap
|
|
36 |
|
|
37 |
Angle skewAngle(); // returns skew estimate
|
|
38 |
int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
|
|
39 |
|
|
40 |
MapStatus setLines();
|
|
41 |
/* Sets fnumlines to the # of text lines
|
|
42 |
and dimensions and sets flineinfo for start and end row
|
|
43 |
for each line. */
|
|
44 |
void Page::display_line_boundaries();
|
|
45 |
/* highlights the space between lines of text in TCL//TK */
|
|
46 |
|
|
47 |
MapStatus extractComponents(int horizMerge);
|
|
48 |
/* Extract Component information for each line of text
|
|
49 |
Does connected component analysis then projects up and
|
|
50 |
down to catch circumflexes. A component list is created
|
|
51 |
for each line of text Assumes image has already been deskewed
|
|
52 |
using deskew and that setLines has been run to determine
|
|
53 |
text line boundaries ***/
|
|
54 |
|
|
55 |
MapStatus recognize();
|
|
56 |
/* Recongnize whole page. Run after extractComponents.
|
|
57 |
learn() or readLearnedGroups() must also be run
|
|
58 |
before this function **/
|
|
59 |
|
|
60 |
MapStatus recognize(int linenum); // just one line
|
|
61 |
/* Recognize characters
|
|
62 |
Perform Character Recogition on a line of components.
|
|
63 |
Use the global variable LearnedGroups for comparison.
|
|
64 |
***/
|
|
65 |
|
|
66 |
MapStatus extractWords();
|
|
67 |
/* Find the start and end of words using avgSpacing and
|
|
68 |
add to word list fWordList */
|
|
69 |
|
|
70 |
void spellcheck();
|
|
71 |
/* spellcheck the list of words (set the mispelled field
|
|
72 |
in each word) */
|
|
73 |
|
|
74 |
int send_words_to_tcl();
|
|
75 |
/* Send words to user interface */
|
|
76 |
|
|
77 |
/* Output options */
|
|
78 |
int writeWordPos(char * filename);
|
|
79 |
/* Write upper left point coordinates, confidence and translation to
|
|
80 |
file*/
|
|
81 |
int writeWordbox(char * filename, int xoffset, int yoffset,
|
|
82 |
bool equationsOnly);
|
|
83 |
/* Write out to scanwrx format */
|
|
84 |
|
|
85 |
int writeAscii(char * filename);
|
|
86 |
/** Write words out to acii file **/
|
|
87 |
|
|
88 |
int writeEquations(char * filename, int lineoffset);
|
|
89 |
/*** Write boundaries of equations */
|
|
90 |
|
|
91 |
|
|
92 |
void printComponents();
|
|
93 |
/* Prints out a little bitmap for each bad component in the list.
|
|
94 |
Uses ConfidenceThreshold as a cutoff for printing characters.
|
|
95 |
Just used for debugging*/
|
|
96 |
void printComponent(Component* comp);
|
|
97 |
|
|
98 |
void Page::printWords();
|
|
99 |
/* prints out bitmap for each component delimiting between words.*/
|
|
100 |
|
|
101 |
|
|
102 |
int get_height();
|
|
103 |
int get_width();
|
|
104 |
int get_linenum(int x, int y); // returns line number of x,y coordinates
|
|
105 |
int get_linenum(Point p) { return get_linenum(p.x(), p.y()); }
|
|
106 |
int get_linenum(Component * comp) {return get_linenum(comp->center()); }
|
|
107 |
|
|
108 |
inline BitMap * bmap() {return fBitMap;};
|
|
109 |
inline RLEMap * rmap() {return fRLEMap;};
|
|
110 |
inline Words * words() {return fWordList;};
|
|
111 |
inline Components ** components() {return fLineComponents;};
|
|
112 |
inline Components * line(int i) {return fLineComponents[i];};
|
|
113 |
|
|
114 |
int numLines() {return fnumLines;};
|
|
115 |
LineMarker * lineinfo() {return flineinfo;};
|
|
116 |
int avgSpacing() {return favgSpacing;};
|
|
117 |
/* Functions to handle equation marking */
|
|
118 |
int addEquation(int startline, int startcol, int endline, int endcol);
|
|
119 |
int deleteEquation(int x,int y); // deletes equation with this coordinate.
|
|
120 |
|
|
121 |
Component * compAt(Point p); // returns pointer to smallest
|
|
122 |
//comp containing p
|
|
123 |
bool inEquation(int x, int y);
|
|
124 |
bool inEquation(ListElement * comp);
|
|
125 |
void join(Point a, Point b) { join(compAt(a), compAt(b));};
|
|
126 |
void join(Component * a, Component * b);
|
|
127 |
|
|
128 |
int thinnestHorizontalSplit(Components * complist,
|
|
129 |
ListElement * compptr);
|
|
130 |
|
|
131 |
int thinnestHorizontalSplit(Component * comp);
|
|
132 |
|
|
133 |
int horizontalCompSplit(Components * complist,
|
|
134 |
ListElement * compptr, int x);
|
|
135 |
int verticalSplit(Components * complist,
|
|
136 |
ListElement * compptr, int y);
|
|
137 |
|
|
138 |
private:
|
|
139 |
int fnumLines; // Number of lines
|
|
140 |
LineMarker * flineinfo; // for each line - start and end row
|
|
141 |
// in RLEMap
|
|
142 |
int favgSpacing ; // Avg spacing between comp (in pixels)
|
|
143 |
Components ** fLineComponents; // A list of components for each line
|
|
144 |
|
|
145 |
Words * fWordList; // A list of words in the document
|
|
146 |
RLEMap * fRLEMap; // Pointer to an RLEMap represntation
|
|
147 |
BitMap * fBitMap; // Pointer to BitMap representation
|
|
148 |
EqnMarkers * fEqnList; // pointer to equation list
|
|
149 |
int spacing(ListElement * compa, ListElement * compb);
|
|
150 |
// helper function for extractWords
|
|
151 |
// Returns # of horizontal blank pixels between 2 components
|
|
152 |
void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
|
|
153 |
void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
|
|
154 |
void setTclDeleteVars(EqnMarker * eqn); // helper function for deleteEquation
|
|
155 |
|
|
156 |
|
|
157 |
};
|
|
158 |
|
|
159 |
class ZonedPage:public Page
|
|
160 |
{
|
|
161 |
public:
|
|
162 |
ZonedPage();
|
|
163 |
~ZonedPage();
|
|
164 |
void autoZone(int horizMerge, int vertMerge);
|
|
165 |
Page * activate(int x, int y); // activate the page at Point(x,y)
|
|
166 |
void recognizeAll();
|
|
167 |
Zones * zones();
|
|
168 |
|
|
169 |
private:
|
|
170 |
Zones * fzones;
|
|
171 |
};
|
|
172 |
|
|
173 |
#endif
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
|
185 |
|
|
186 |
|
|
187 |
|
|
188 |
|