Init from working directory of svn repository.
/* Page.h
The Page class is the primary class used for the OCR system
It has two data representations of a page of text, an RLEMap
and a BitMap.
*/
#ifndef _PAGE_H
#define _PAGE_H
#include "system.h"
#include "EqnMarker.h"
#include "Component.h"
#include "RLEMap.h"
#include "BitMap.h"
#include "LineMarker.h"
#include "Word.h"
#include "tcl_interface.h"
#include "Zone.h"
class Zones;
class Page {
public:
// Constructor, Destructor
friend main();
friend void testocr(int argc, char ** argv);
Page();
~Page();
// Read is from 2 level TIFF files.
// Calls BitMap function. readMap then converts to RLE
MapStatus readMap(char * filename); // Calls BitMap::readMap
Angle skewAngle(); // returns skew estimate
int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
MapStatus setLines();
/* Sets fnumlines to the # of text lines
and dimensions and sets flineinfo for start and end row
for each line. */
void Page::display_line_boundaries();
/* highlights the space between lines of text in TCL//TK */
MapStatus extractComponents(int horizMerge);
/* Extract Component information for each line of text
Does connected component analysis then projects up and
down to catch circumflexes. A component list is created
for each line of text Assumes image has already been deskewed
using deskew and that setLines has been run to determine
text line boundaries ***/
MapStatus recognize();
/* Recongnize whole page. Run after extractComponents.
learn() or readLearnedGroups() must also be run
before this function **/
MapStatus recognize(int linenum); // just one line
/* Recognize characters
Perform Character Recogition on a line of components.
Use the global variable LearnedGroups for comparison.
***/
MapStatus extractWords();
/* Find the start and end of words using avgSpacing and
add to word list fWordList */
void spellcheck();
/* spellcheck the list of words (set the mispelled field
in each word) */
int send_words_to_tcl();
/* Send words to user interface */
/* Output options */
int writeWordPos(char * filename);
/* Write upper left point coordinates, confidence and translation to
file*/
int writeWordbox(char * filename, int xoffset, int yoffset,
bool equationsOnly);
/* Write out to scanwrx format */
int writeAscii(char * filename);
/** Write words out to acii file **/
int writeEquations(char * filename, int lineoffset);
/*** Write boundaries of equations */
void printComponents();
/* Prints out a little bitmap for each bad component in the list.
Uses ConfidenceThreshold as a cutoff for printing characters.
Just used for debugging*/
void printComponent(Component* comp);
void Page::printWords();
/* prints out bitmap for each component delimiting between words.*/
int get_height();
int get_width();
int get_linenum(int x, int y); // returns line number of x,y coordinates
int get_linenum(Point p) { return get_linenum(p.x(), p.y()); }
int get_linenum(Component * comp) {return get_linenum(comp->center()); }
inline BitMap * bmap() {return fBitMap;};
inline RLEMap * rmap() {return fRLEMap;};
inline Words * words() {return fWordList;};
inline Components ** components() {return fLineComponents;};
inline Components * line(int i) {return fLineComponents[i];};
int numLines() {return fnumLines;};
LineMarker * lineinfo() {return flineinfo;};
int avgSpacing() {return favgSpacing;};
/* Functions to handle equation marking */
int addEquation(int startline, int startcol, int endline, int endcol);
int deleteEquation(int x,int y); // deletes equation with this coordinate.
Component * compAt(Point p); // returns pointer to smallest
//comp containing p
bool inEquation(int x, int y);
bool inEquation(ListElement * comp);
void join(Point a, Point b) { join(compAt(a), compAt(b));};
void join(Component * a, Component * b);
int thinnestHorizontalSplit(Components * complist,
ListElement * compptr);
int thinnestHorizontalSplit(Component * comp);
int horizontalCompSplit(Components * complist,
ListElement * compptr, int x);
int verticalSplit(Components * complist,
ListElement * compptr, int y);
private:
int fnumLines; // Number of lines
LineMarker * flineinfo; // for each line - start and end row
// in RLEMap
int favgSpacing ; // Avg spacing between comp (in pixels)
Components ** fLineComponents; // A list of components for each line
Words * fWordList; // A list of words in the document
RLEMap * fRLEMap; // Pointer to an RLEMap represntation
BitMap * fBitMap; // Pointer to BitMap representation
EqnMarkers * fEqnList; // pointer to equation list
int spacing(ListElement * compa, ListElement * compb);
// helper function for extractWords
// Returns # of horizontal blank pixels between 2 components
void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
void setTclDeleteVars(EqnMarker * eqn); // helper function for deleteEquation
};
class ZonedPage:public Page
{
public:
ZonedPage();
~ZonedPage();
void autoZone(int horizMerge, int vertMerge);
Page * activate(int x, int y); // activate the page at Point(x,y)
void recognizeAll();
Zones * zones();
private:
Zones * fzones;
};
#endif