reference/ocr-new/Word.cc
author viric@llimona
Thu, 18 May 2006 23:12:51 +0200
changeset 0 6b8091ca909a
permissions -rw-r--r--
Init from working directory of svn repository.

#include "list.h"
#include "system.h"
#include "stdio.h"

extern Page * global_page;

Word::Word(char * word, int length)
/*--------------------------------------------------------------
Primary Purpose: Create a word without pointers to components
Arguments: word is the character string length is the length
Words created with this constructor have a null component pointer
Rev: 12/5/95

---------------------------------------------------------------*/
{
      charCount = 1;
      compCount = 0;
      characters = new char[length];
      strcpy(characters, word);
      confid = 255;
      ul = NOPNT;
      lr = NOPNT;
      character = NULL;
      mispelled = 0;
}


Word::Word(ListElement * first, int count, int charlength)
/*--------------------------------------------------------------
Primary Purpose:
Arguments: first is a pointer to a ListElement that contains
the first Component in the word.  count is the number
of components in the word. charlength is the actual length of the word

Effects:  Sets all data members of the word class
Rev:  11/6/95
---------------------------------------------------------------*/
{
      mispelled = 0;
      Component * firstComp = (Component *) first->item;
      Component * item;
      int i;
      int charOffset=0;
      ListElement * ptr;

      
      characters = new char[charlength+1];  // ascii translation
      character = new (Component *)[count];
      charCount = charlength;
      compCount = count;
      ul = firstComp->ul();
      
      confid = 256;
      
      for (i = 0, ptr = first; i < count; ptr = ptr->next, i++) 
	{
	  item = (Component *)(ptr->item);
	  strncpy(&(characters[charOffset]),item->fasciiId,item->asciiLen());
//	  printf("copied %s to %s at %d\n",item->fasciiId, characters,charOffset);
	  charOffset += item->asciiLen();
	  
	  // assert(charOffset <= charlength);
	  if (charOffset > charlength)
	    {
	      printf("\ncharOffset: %d charlength: %d", charOffset,charlength);
	    }
	  character[i] = item;
	  if (item->confid() < confid)
	    confid = item->confid();
	  if(item->ul().y() < ul.y())
	    ul.y() = item->ul().y();
	  if(i == count-1)      // this is the last character
	    lr = item->lr();
	}  

      
      characters[charlength] = '\0';
      if(0)
	printf("Identified a word: %s\n", characters);
    
};


 Word::~Word()
{
  if (characters != NULL)delete characters;
  if (character != NULL) delete character;
};


Words::~Words()
{
  for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
    if (ptr->item != NULL)
      delete (Word *) (ptr->item);
     }

}

int Words::writeWordPos(char * filename)
/*--------------------------------------------------------------
Primary Purpose: Write word position, confidence length and string to file
Arguments: output file name
Return Value: 1 for success 0 for file
Effects: create and write out to filename each word in the following format
All numeric fields are in fixed columns 7 characters wide
      upperleft x pos
      upperleft y pos
      word confidence
      character count
      string  
      \N
Rev: KM 11/25
---------------------------------------------------------------*/
{
  FILE * outfile;
  outfile = fopen(filename, "w");
  if (outfile == NULL)
      {
	printf("Error openning %s", filename);
	return 0;
      }
  
  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
      {
	Word * word = (Word *) ptr->item;
	if (word->ul.x() == -1) continue; // dont print new lines.
	fprintf(outfile, " %6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
		          word->confid, word->charCount, word->characters );
      }
  fclose(outfile);
  return 1;
}


int Words::writeWordbox(char * filename, int xoffset=0, int yoffset=0,
			Page * page=global_page, bool equationsOnly=0)
/*--------------------------------------------------------------
Primary Purpose: Write Scanworx wordbox format 
Arguments: output file name x and y offset, parent page and bool for printing
equations only
Return Value: 1 for success 0 for file
Effects: create and write out to filename each word in the following format
      string
      upperleft x pos
      upperleft y pos

      lowerright x pos
      lowerright y pos

      upperright x pos
      upperright y pos

      lowerleft x pos
      lowerleft y pos

      \N
Rev: KM 11/25
---------------------------------------------------------------*/
{
  FILE * outfile;
  outfile = fopen(filename, "w");
  Point cntr;

  if (outfile == NULL)
      {
	printf("Error openning %s", filename);
	return 0;
      }
  fprintf(outfile, "\n"); // print new lines.
  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
      {
	Word * word = (Word *) ptr->item;
	cntr = word->center();
	if (!(equationsOnly) || (page->inEquation(cntr.x(), cntr.y())))
	  {
	    if (word->ul.x() == -1) 
	      fprintf(outfile, "\n"); // print new lines.
	    else
	      fprintf(outfile, "%s %d %d %d %d %d %d %d %d \n",
		      word->characters,
		      word->ul.x() + xoffset, word->ul.y() + yoffset,
		      word->lr.x() + xoffset, word->lr.y() + yoffset,
		      word->lr.x() + xoffset, word->ul.y() + yoffset,
		      word->ul.x() + xoffset, word->lr.y() + yoffset);
	  }	
      }
  fclose(outfile);
  return 1;
}





int Words::writeAscii(char * filename)
/*--------------------------------------------------------------
Primary Purpose: Write word list to ascii file
Arguments: filename to write to
Return Value:  1 if successful 0 if unsuccessful
Effects: Writes words to fill in text format

Rev: 11/25 KM
---------------------------------------------------------------*/
{
  FILE * outfile;
  outfile = fopen(filename, "w");
  Word * prev = (Word *) first->item;
  if (outfile == NULL)
      {
	printf("Error openning %s", filename);
	return 0;
      }
  
  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
      {
	Word * word = (Word *) ptr->item;
	fprintf(outfile, "%s ", word->characters );

      }
  fclose(outfile);
  return 1;

}