reference/ocr-simple/Page.cc
author viric@llimona
Thu, 18 May 2006 23:12:51 +0200
changeset 0 6b8091ca909a
permissions -rw-r--r--
Init from working directory of svn repository.

/** Page.cc contains the member functions for the primary OCR class Page */
#include "system.h"
#include "Page.h"
#include "convertMap.h"
#include "get_skew.h"
#include "Component.h"
#include "status_message.h"

/*** Member functions of class Page.     ***/

int Page::get_height()
{
  return fRLEMap->imageLength();
}

int Page::get_width()
{
  return fRLEMap->imageWidth();
}

int Page::send_words_to_tcl()
/*--------------------------------------------------------------
Primary Purpose:  Display words in tcl
Rev - AR
---------------------------------------------------------------*/
{
  int word_count = 0;
  int unknown_char_count = 0;
  int low_precision_count = 0;
  int mispelled_count = 0;
  char* send_chars;
  Word* temp_word;
  if(ENABLE_USER_INTERFACE) set_status("Displaying text");
  for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
    {
      word_count++;
      set_text_display_status(word_count, fWordList->num_words);
      temp_word = (Word*)ptr->item;
      send_chars = backslashify(temp_word->characters);
      /*	printf("Added word %s Confidence = %d\n", send_chars, 
	       temp_word->confid); */
      if(temp_word->confid < VERY_LOW_CONFIDENCE)
	  {
	    docommand("addword \"%s\" %d %d UNKNOWN_CHAR", send_chars, temp_word->ul.x(),  temp_word->ul.y());
	    unknown_char_count++;
	  }
      else if(temp_word->confid < LOW_CONFIDENCE)
	  {
	    docommand("addword \"%s\" %d %d LOW_PRECISION", send_chars, temp_word->ul.x(),  temp_word->ul.y());
	    low_precision_count++;
	  }
      else if((temp_word->mispelled) && SPELLCHECK)
	  {
	    docommand("addword \"%s\" %d %d MISPELLED", send_chars, temp_word->ul.x(),  temp_word->ul.y());
	    mispelled_count++;
	  }
      else
	  {
	    docommand("addword \"%s\" %d %d OK", send_chars, temp_word->ul.x(),  temp_word->ul.y());
	  }
      update();
    }
  if(ENABLE_USER_INTERFACE)
      {
    set_status("Done displaying text");
    set_status("Apparent word accuracy: %.3lf%%", (100 - (100 * ((double)(mispelled_count + unknown_char_count + low_precision_count) / (double)word_count))));
  }
}


int Page::deskew(int deskew_method)
/*--------------------------------------------------------------
Primary Purpose: Deskew the page
Arguments: 1 - RLE Rotation
           0 - BitMap Rotation
Return Value: 1 if successful, 0 if unsuccessful
Effects: updates the bitmap and rlemap of the page
Constraints: RLEMap Rotation is not currently reliable and probably
should not be used
Rev: AR
---------------------------------------------------------------*/
{
  /* a little ugly.... if the page is rotated
     in here, return 1, else 0 */

  if(deskew_method == RLE_DESKEW)
      {
	if(fRLEMap->deskew())
	{
	  convertMap(fRLEMap, fBitMap);
	  return 1;
	}
	return 0;
      }
  else
      {
      double skew = get_skew(fRLEMap);
      if((skew >= MINIMUM_SKEW_ANGLE)||(skew <= - MINIMUM_SKEW_ANGLE))
	  {
	    fBitMap->rotateMap(skew);
	    convertMap(fBitMap, fRLEMap);
	    return 1;
	  }
      return 0;
    }
}

Page::Page()
/**Page::Page - constructor allocates bitmap and rlemap*/
{
  fBitMap = new BitMap;
  fRLEMap = new RLEMap;
  fLineComponents = NULL;
  fWordList = NULL;
}

Page::~Page()
/*--------------------------------------------------------------
Primary Purpose:  Destructor deallocates private fields that
have been created.
Rev:
---------------------------------------------------------------*/
{
 
  if (flineinfo) delete flineinfo;
  for (int  i = 0; i <fnumLines; i++)
        if(fLineComponents[i]) delete fLineComponents[i];
  if(fLineComponents) delete fLineComponents;
  if (fBitMap) delete fBitMap;
  if (fRLEMap) delete fRLEMap;
  if (fWordList) delete fWordList;
}

Angle Page::skewAngle()
/*--------------------------------------------------------------
Primary Purpose: Determine the angle of rotation of the RLEMap r
Arguments: pointer to an RLEMap
Return Value: detected angle of rotation
Code is in get_skew.cc
Rev: AR
---------------------------------------------------------------*/
{
  return get_skew(fRLEMap);
}


MapStatus Page::readMap(char * filename)
 // Calls BitMap::readMap and then converts
{
  MapStatus status;
  status = fBitMap->readMap(filename);
  convertMap(fBitMap, fRLEMap);
  return status;
}



MapStatus Page::setLines()
/*--------------------------------------------------------------
Primary Purpose:  Set flineinfo array in Page class with the 
      starting and ending rows of each line of text.
      Also sets fnumLines to the number of lines
Arguments: none
Return Value: A Mapstatus either VALID, EMPTY if there is no
   data in the RLEMAP, or OTHERERROR if there is an unexpected error
Effects:  Allocates flineinfo and fills with starting and ending row
   of each line.  The following global variables are used as parameters
   in this function.  These are defined in system.cc
   NoiseTolerance - Rows whose number of pixels is less than  this value
                will be considered empty (current val 6). 
   MinVertSeparation - The minimum number of rows separating lines of text.
                 Lines will be merged if actual Separation is less than this
		 value. (current val 3)
   MinLineSize - The minimum number of rows in a line of text.  
                 Any smaller lines are discarded (currentval 5)

Constraints: Page::readMap() must be run first to fill fRLEMap 
Rev: 10/26 KM
---------------------------------------------------------------*/
{

   int maxrow = fRLEMap->imageLength() - 1;      // maximum row number 
   int actualSeparation = MinVertSeparation + 1; // must be bigger than min
                                                 // for line 0

   int linenum=0;                                // current line number
   int prvlinenum = 0;
   int lineSize;                                 // # rows in current line 

   int maxLines = maxrow/MinLineSize;           // max # of lines of text 

   if(maxrow == 0) return EMPTY;

   flineinfo = new LineMarker[maxLines]; 

   for (int i = 0; i < maxrow;)
	{
	  LineMarker & thisLine = flineinfo[linenum];
	  LineMarker & prevLine = flineinfo[prvlinenum];

	  while (i < maxrow && fRLEMap->row(i)->numPixels < NoiseTolerance)
	    i++;
	  thisLine.fstartrow = i++;
	  while (i < maxrow &&fRLEMap->row(i)->numPixels > NoiseTolerance)
	    i++;
	  

	  lineSize = i - thisLine.fstartrow +1;

	  // If this line is less than MinVertSeparation away
	  //  from the last line.  Join the two together.
	  if (linenum > 0)
	    {
	      actualSeparation = thisLine.fstartrow - prevLine.fendrow;
	    }
	  if (actualSeparation < MinVertSeparation)
	    {
	     // If too small of a separation, add into prev row
	     prevLine.fendrow = i;
	   }
	  else if (lineSize >= MinLineSize)
	    {
	    thisLine.fendrow = i;
/*	    printf (" Line %d  Start: %d  End: %d  lineHeight %d\n", 
	        linenum,thisLine.fstartrow,
	        thisLine.fendrow, 
	        thisLine.fendrow  - thisLine.fstartrow +1);
*/
	    prvlinenum = linenum;
	    linenum++;

	  }
	  if (linenum >= maxLines) return OTHERERROR;
	}

   fnumLines = linenum;   // Set number of lines in page class

   fLineComponents = new Components*[fnumLines];
   if((ENABLE_USER_INTERFACE) && DISPLAY_LINE_BOUNDARIES)
     {
       display_line_boundaries();
     }
   /*   printf("Setlines found a total of %d lines.\n", fnumLines); */
   if(ENABLE_USER_INTERFACE) 
     update(); 
   return VALID;
 }

void Page::display_line_boundaries()
/*--------------------------------------------------------------
Primary Purpose: Display line boundaries in TCL/TK.  Called from
setLines if ENABLE_USER_INTERFACE and DISPLAY_LINE_BOUNDARIES are
set to TRUE
Effects:  Draws a blue line between each line of text
Rev:  AR
---------------------------------------------------------------*/
{
  int centerline, width;
  for(int j=0; j < fnumLines; j++)
    {
      centerline = (flineinfo[j].fendrow + flineinfo[j + 1].fstartrow) / 2;
      width = flineinfo[j + 1].fstartrow - flineinfo[j].fendrow;

      scale(centerline);
      scale(width);
      /* having this pathname here is probably not such a good idea...*/
      
      docommand(".main_window.display.work_space create line %d %d %d %d -width %d -fill blue -tags {project_ray IMAGE_TAG} -stipple @/usr/sww/share/tclX-7.3a/tkX/3.6a/demos/bitmaps/grey.25", 0, centerline, bmap()->imageWidth(), centerline, width);
    }
}


int test_rlemap_lines(RLEMap* rmap)
{
  int length = rmap->imageLength();
  for(int i = 0; i < length; i++)
    printf("On line %d, numpixels = %d\n", i, rmap->fMapData[i]->numPixels);
}


MapStatus Page::extractComponents()
/*--------------------------------------------------------------
                     Component extraction routines.
*
* Given the top and bottom line of a row we want to generate a list of
* components. The general method is to find the closest dot, trace its 
* connected dots, then project upwards and downwards and add anything we 
* find there to the component. We will erase the component from the RLEMap
* as it is added to the component list. By projecting up and down 
* from the piece we first find we should be able
* to completely encompass characters like :;i?|! The only problems are 
* italic or ligatured characters where we may pick up two or more 
* characters at a time (which would be bad) or characters fragmented 
* with a vertical gap.

Primary Purpose: Main extraction routine.
Effects: Makes new components and puts them in a list. Deletes components 
         from RLE map. Fills in component boundaries and calls 
	 Component::setProperties to set the property vector
         Lastly convertMap is run to rebuild the RLEMap
Constraints: Page::setLines() must be run first 
Rev: 11/2 JMH
     11/8 KM add set properties and
     avgSpacing;
---------------------------------------------------------------*/
{
  int currentCol, startRow, endRow, rowHeight;
  ListElement* intrvl;
  ListElement* tempintrvl;
  /*  printf("fnumLines = %d\n", fnumLines); */
  Component* comp;
  int  totalSpacing = 0;  // total blank horizontal pixels between components
  int  baselines[MaxVertSize];     // array for finding the baseline
  last_status = 0.0;
  int compCounter = 0;
  int i;
  int j;
    printf("Extracting Components\n");
  for (i = 0; i < fnumLines; i++) {
    if(ENABLE_USER_INTERFACE)
      set_component_status(i, fnumLines);
    currentCol = 0;
    startRow = flineinfo[i].fstartrow;
    endRow = flineinfo[i].fendrow;
    rowHeight = endRow - startRow;
    assert(rowHeight > 0);

    for (j=0; j < MaxVertSize; j++)
      baselines[j] = 0;
    fLineComponents[i] = new Components();


    while (currentCol<=fRLEMap->imageWidth()) {  //until we reach the end of the page

	//Build component starting with closest black dot
	intrvl = fRLEMap->FindNearHorizDot(currentCol, startRow, endRow);
	if (intrvl == NULL) {
	//  printf("Reached end of line\n");
	  break;
	}
	comp = new Component(); //Make a new component named comp
	assert(comp->AddToComponent(intrvl, fRLEMap));

	//Now we want to extend upwards
	//First check if there is a blank space to the right
	tempintrvl = fRLEMap->FindNearHorizDot(comp->lr().x(), 
					       startRow, endRow);
	if (tempintrvl != NULL && ((RLEPair*) tempintrvl->item)->start > 
	    comp->lr().x()+MinHorizSeparation+1)
	  while (comp->ul().y() < endRow) {
	    intrvl = fRLEMap->FindNearVertDot(comp->ul().x(), 
					      comp->lr().x(), comp->lr().y(),
					      startRow);
	    if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)))
	      break;
	    if (intrvl == NULL) break;
	  }
	else
	  while (comp->ul().y() < endRow) {
	    intrvl = fRLEMap->FindNearVertDot(comp->ul().x(), 
					      comp->lr().x(), comp->ul().y(),
					      startRow);
	    if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)));
	    break;
	    if (intrvl == NULL) break;
	  }

	//Now we want to extend downwards
	while (comp->lr().y() > startRow) {
	  intrvl = fRLEMap->FindNearVertDot(comp->ul().x(), comp->lr().x(), 
						    comp->lr().y(), endRow);
	  if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)))
	    break;
	  if (intrvl == NULL) break;
	}

	// Now we toss out the noise
	int size;
	if (comp != NULL) {
	  if (comp->ul() < Point(0,0))
	    printf("Here's a problem. %d, %d\n", comp->ul().x(), comp->ul().y());
	  else
	    size = fBitMap->pixelsInRegion(comp->ul(), comp->lr());
	}
	  else
	    size = 0;
	if (size < MinComponentSize) {
//	  printf("Deleting some noise of size %d\n", size);
	  // printComponent(comp);
	  delete comp;
	  comp = NULL;
	}
	else
	    {
	      compCounter++;
	      // display a rectangle around the component
	      if(ENABLE_USER_INTERFACE)
		  {
		    if(DISPLAY_BOUNDING_BOXES)
		      comp->display_bounding_box();
		  }
	  
	  // JMH - make an array of frequency of the y coord of bottom of comp
	      int vertOffset = endRow - comp->lr().y();
	      if(vertOffset < MaxVertSize && vertOffset >= 0)
		baselines[vertOffset]++;

	  
	      comp->setProperties(fBitMap);
	      if(fLineComponents[i]->last != NULL)
		totalSpacing += 
		  comp->ul().x() - 
		    ((Component *) (fLineComponents[i]->last->item))->lr().x();

	      fLineComponents[i]->Append(comp);       // add this component to list
	      currentCol = (comp->lr()).x() + 1;   // update position on page
	    }
      }
    
    // find most popular bottom of comp and call it the baseline
    int counter = 0;
    int baseline;
    for (j=0; j < MaxVertSize; j++) {
      if (counter < baselines[j]) {
	counter = baselines[j];
	baseline = endRow - j;
      }
    }
    //    printf("For row %d to %d baseline = %d\n", startRow, endRow, baseline);
    // Now assign each character a group based on it's location
    for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL; 
	 ptr = ptr->next) {
      comp = (Component*) ptr->item;
      comp->charGroup = 0;
      
      // if top of char is higher than top - tolerance 
      if (comp->ul().y() < startRow + (rowHeight/TopLineTolerance)) {
	comp->charGroup += 2; //tall like a T
      }
      
      // if bottom of char is lower than base - tolerance
      if (comp->lr().y() > baseline + (rowHeight/BaseLineTolerance)) {
	comp->charGroup += 1; //has a tail like a y
      } else 
	if (comp->lr().y() < (baseline - (2*rowHeight/BaseLineTolerance))) {
	  comp->charGroup = 4; //floating like a '
	  /*	  printf("bottom at %d < %d\n", comp->lr().y(),
		  baseline - (2*rowHeight/BaseLineTolerance)); */
	}
      //      printf("added character in group %d\n", comp->charGroup);
    }
  }
  /*  printf("Found %d components on this page.\n", compCounter); */
  //  printComponents();
  last_status = 0.0;
  if(ENABLE_USER_INTERFACE)
    set_status("Done extracting characters");
  if((compCounter - fnumLines) > 0) /* don't want divide by zero */
    {
      favgSpacing = totalSpacing / (compCounter - fnumLines);
    }
  else
    {
      favgSpacing = 1;  
    }
  delete fRLEMap;
  fRLEMap = new RLEMap;
  convertMap(fBitMap, fRLEMap);
}

void Page::printComponents()
/*--------------------------------------------------------------
Primary Purpose: Debugging routine that prints little bitmaps
of low confidence characters
---------------------------------------------------------------*/
{
  int compcounter = 0;
  for (int i = 0; i < fnumLines; i++) {
    Component* comp;
    for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL; 
	 ptr = ptr->next) {
      compcounter++;
      comp = (Component *) ptr->item;
      if (comp->confid() < (ConfidenceThreshold-20) && comp->asciiId() == 'n')
      {
	printf("Here's a poorly recognized component ul=%d,%d, lr=%d,%d.\n\n", 
	   (comp->ul()).x(), (comp->ul()).y(),
	   (comp->lr()).x(), (comp->lr()).y());
	printComponent(comp);
	printf("properties: "); 
	printVector(comp->properties(), numProperties);
	printf("I think it's a -> %c <-   confidence: %d  line: %d  group: %d Comp#%d\n",
	       comp->asciiId(),
	       comp->confid(), i+1, comp->charGroup, compcounter);
	printf("\n*******************************************************\n");
      }
    }
  }
}

void Page::printComponent(Component* comp)
// Print a single component.
{
  int right = comp->ul().x()+78;
  if (comp->lr().x() < right) 
    right = comp->lr().x();

  for (int r = comp->ul().y(); 
       r <= comp->lr().y(); r++){
    for (int c = comp->ul().x();
	 c <= right; c++)
      bitprint(fBitMap->row(r)[c/8], c%8);
    printf( "\n");
  }
}

int spacing(ListElement * compa, ListElement * compb);
// helper function for extractWords  (defined below)

MapStatus Page::extractWords()
/*--------------------------------------------------------------
Primary Purpose: Extract words from each lines components
Effects: sets the fWordsList to be a list of all of the words
in the document.
Constraints: extractComponents must be run first
Rev: KM 11/7/95
---------------------------------------------------------------*/
{
  bool inWord;
  ListElement * start;   // word Start
  int count;   // counts the characters in the word
  int word_count = 0;
  int spacingThreshold = (int) (1.25 * ((float) (favgSpacing)));
  fWordList = new Words;
  last_status = 0.0;
  for (int i = 0; i < fnumLines; i++)
      {
	if(ENABLE_USER_INTERFACE)
	  set_extract_status(i, fnumLines);
	inWord = FALSE;
	for(ListElement *ptr = line(i)->first; ptr != NULL; ptr = ptr->next) {
	  if(!inWord)
	      {
		start = ptr;
		count = 1;
		inWord = TRUE;
	      }
	  if( spacing(ptr, ptr->next) > spacingThreshold)
	      {
		Word * newWord = new Word(start,count);
		(words())->Append(newWord);
		if(1)
		  printf("%s ",newWord->characters);
		inWord = FALSE;
		word_count++;
	      }
	  else
	    count++;
	}
	// Add in a separate word for new line
	Word * newWord = new Word("\n",2);
        (words())->Append(newWord);
	printf("%s", newWord->characters);
	word_count++;
      }
  last_status = 0.0;
  fWordList->num_words = word_count;
  if(ENABLE_USER_INTERFACE)
    set_status("Done extracting words");
  return VALID;
}

void Page::spellcheck()
/*--------------------------------------------------------------
Primary Purpose: Run spell checker on word list.
Constraints: extractWords must be run first
Rev: AR
---------------------------------------------------------------*/
{
  int word_count = 0;
  Word* temp_word;
  for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
    {
      word_count++;
      if(ENABLE_USER_INTERFACE)
	set_spellcheck_status(word_count, fWordList->num_words);
      temp_word = (Word*)ptr->item;
      if(0)
	printf("Spellchecking word %s\n", temp_word->characters);
      if(mispelled(temp_word->characters))
	{
	  temp_word->mispelled = TRUE;
	}
    }
}

int Page::spacing(ListElement * compa, ListElement * compb)
// spacing from end of comp_a to begining of comp_b
{
  int x;
  if (compb == NULL) return 1000;  // end of line

  Component * a = ((Component *) (compa)->item);
  Component * b = ((Component *) (compb)->item);
  int returnval =  (b->ul().x() - a->lr().x());
  if (returnval < 0) 
    {
      return 0;
    }
  assert (returnval >= 0);
  return returnval;

}


void Page::printWords()
// Prits out each component of each word. This can take a very long time
{

  Word * thisWord;
  for (ListElement * ptr = words()->first; ptr !=NULL; ptr= ptr->next)
      {
	thisWord = (Word *) ptr->item;
	printf("!!!!!! NEW WORD  %s  confid : %d !!!!!\n", thisWord->characters, thisWord->confid);
	for(int i = 0; i < thisWord->charCount; i++)
	    {
	      Component * comp = thisWord->character[i];
	      if (comp == NULL) continue;
	      printf("Printing a component ul=%d,%d, lr=%d,%d.\n\n", 
		     (comp->ul()).x(), (comp->ul()).y(),
		     (comp->lr()).x(), (comp->lr()).y());
	      for (int r = comp->ul().y(); 
		   r <= comp->lr().y(); r++){
		for (int c = comp->ul().x();
		     c <= comp->lr().x(); c++)
		  bitprint(fBitMap->row(r)[c/8], c%8);
		printf( "\n");
	      }
	      printf("properties: "); 
	      printVector(comp->properties(), numProperties);
	      printf("Identification:  %c distance: %d confidence %d\n",
		     comp->asciiId(),
		     comp->distance(&LearnedChars[comp->asciiId()]),
	             comp->confid());
	      printf("\n***********************************************\n");
	    }
      }
}

MapStatus Page::recognize()
/*--------------------------------------------------------------
Primary Purpose: Recognize entire page.  Sets font and ascii id of
each component
Return Value: VALID if no error occurred OTHERERROR otherwise
Constraints: extractComponents must be run first.
See recognize(line) below for more detailed info
Rev: KM
---------------------------------------------------------------*/
{
  printf("Recognizing document\n");
  last_status = 0.0;
  for (int i = 0; i< fnumLines; i++)
      { 
	if(ENABLE_USER_INTERFACE)
	  set_recognize_status(i, fnumLines);
	recognize(i);
      }

  last_status = 0.0;
  return VALID;

}


MapStatus Page::recognize(int linenum)
/*--------------------------------------------------------------
Primary Purpose: Recognize a line of connected components
Arguments:  linenum is line number to recognize
Effects: sets ascii identification fontid and confidence in each component
If confidence is low and character is big enough for two characters.
divideAndRecognize is called to split up the component.
Constraints: extractComponents must be run first
Rev: KM 11/9/95
---------------------------------------------------------------*/
{
  Component * comp;
  Distance d;

  for(ListElement *ptr = line(linenum)->first; ptr != NULL; ptr = ptr->next) 
      {
	comp = (Component *) ptr->item;

	d = comp->recognize(LearnedGroups);
	if (comp->confid() < ConfidenceThreshold && 
	    comp->width() > 2*MinWidth) // really wide
	  divideAndRecognize(line(linenum), ptr, d);
      }

  return VALID;
}



void Page::divideAndRecognize (Components *list, ListElement * ptr, Distance d)
/*--------------------------------------------------------------
Primary Purpose: Identify and separate merged characters
Arguments:ptr is a pointer to a list element containing a component
          d is the current recognition distance on the component
Effects: Subdivides component into two parts, Division is made at
         the minimum vertical height of the component.  If the 
	 minHeight > JoinTolerance no divison will be made.
	 (JoinTolerance is a global var that determines
	 the maximum number of merged pixels that are allowed in a
	 column for a division to be made)
	 When a division is made.  The component's boundaries are 
	 adjusted accordingly and a new component is inserted into
	 the list.

	 Returns if distance is acceptable or width of component
	 is <= MinWidth*2
Rev: KM 11/24/95
---------------------------------------------------------------*/
{
  Component * comp = (Component *) ptr->item;
  Component * newComp;
  bool allGroups = TRUE;

  // Save the original component boundaries just in case we cant improve
  Point oldlr = comp->lr();
  Point oldul = comp->ul();
  int oldwidth = (int) comp->width();

  // Some easy access x,y coordinates
  int ulx = comp->ul().x();
  int uly = comp->ul().y();
  int lrx = comp->lr().x();
  int lry = comp->lr().y();

  Distance newdist, bestdist;
  int bestlrx;

  if (comp->confid() > ConfidenceThreshold)
    return;

  if (oldwidth < MinWidth*2)  // cant be split in two
      {
	return;
      }

  // Determine where to split.  Split at the thinnest point
  // within JoinTolerance (maximum number of pixels that might be fused)

  int minHeight = (int)comp->height();
  bestlrx = comp->lr().x();
  for(int i = MinWidth; i < oldwidth - MinWidth; i++)
      {
	int newHeight = 
	  fBitMap->pixelsInRegion(Point(ulx+i,uly), Point(ulx+i,lry));
	if (newHeight < minHeight)
	    {
	      minHeight = newHeight;
	      bestlrx = ulx+i;
	    }
      }
//  printf("bestlrx = %d, minHeight = %d\n", bestlrx, minHeight);


  if (bestlrx < lrx  && minHeight < JoinTolerance)
      {
	comp->lr().x() = bestlrx;
	int shrunk = comp->vertShrink(fBitMap);
	comp->setProperties(fBitMap);
	if (shrunk)  // ignore group if we had to shrink down
	  newdist = comp->recognize(LearnedGroups, allGroups);
	else
	  newdist = comp->recognize(LearnedGroups);

//	printf("Distance = %u  asciiid = %c \n", newdist, comp->asciiId());

	Component * newcomp = new Component(Point(bestlrx+1, oldul.y())
					    , oldlr);
	newcomp->vertShrink(fBitMap);
	newcomp->setProperties(fBitMap);
	int newcompdist = newcomp->recognize(LearnedGroups,allGroups);

	if (newdist < d)
      	  list->insertAfter(ptr, newcomp);
	else
	    {
	      	comp->ul() = oldul;
		comp->lr() = oldlr;
		comp->setProperties(fBitMap);
		comp->recognize(LearnedGroups);
		delete newcomp;
	    }
	return;
      }


  return;

}


void Page::uniteAndRecognize (Components *list, ListElement * ptr, Distance d)
/*--------------------------------------------------------------
Primary Purpose: Identify and merge a separated character
Arguments:ptr is a pointer to a list element containing a component
          d is the current recognition distance on the component
Effects: Unite two components into one.
    
Rev: JMH 12/10/95
---------------------------------------------------------------*/
{
  Component * part1 = (Component *) ptr->previous->item;
  Component * part2 = (Component *) ptr->item;
  Point ul, lr;
  ul = part1->ul();
  lr = part2->lr();
  if (ul.y() > lr.y() || ul.x() > lr.x())
    return;
  Component * newcomp = new Component(part1->ul(), part2->lr());

  newcomp->setProperties(fBitMap);
  if (part1->charGroup <= 3 && part2->charGroup <= 3)
    newcomp->charGroup = (part1->charGroup | part2->charGroup);
  else if (part1->charGroup == 4)
    newcomp->charGroup = (part2->charGroup | 2);
  else
    newcomp->charGroup = (part1->charGroup | 2);
  if (newcomp->charGroup > 4) newcomp->charGroup = 4;

  int newdist = newcomp->recognize(LearnedGroups);

  if (newdist < d) {
    list->removeAt(ptr->previous);
    list->insertAfter(ptr, newcomp);
    list->removeAt(ptr); 
  } else
    delete newcomp;
  return;

}


int Page::writeWordPos(char * filename)
/*--------------------------------------------------------------
Primary Purpose: Writes word position, confidence, length and string to file
Arguments: output file name
Return Value: 1 if successful. 0 if an error occured
Effects: Calls fWordList->printWordPos
	  // Output format for each word
	      "%6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
		          word->confid, word->charCount, word->characters 
Rev: 11/25/95
---------------------------------------------------------------*/
{ return fWordList->writeWordPos(filename);};

int Page::writeAscii(char * filename)
/*--------------------------------------------------------------
Primary Purpose: Write word list to asii file
Arguments: filename to write to
Return Value:  1 if successful 0 if unsuccessful
Effects:  Calss fWordList->writeAscii(filename)
Writes words to fill in text format using MinLineSize
to differentiate lines.
Rev: 11/25 KM
---------------------------------------------------------------*/

{return fWordList->writeAscii(filename);};