reference/ocr-new/Word.cc
changeset 0 6b8091ca909a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Word.cc	Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,233 @@
+#include "list.h"
+#include "system.h"
+#include "stdio.h"
+
+extern Page * global_page;
+
+Word::Word(char * word, int length)
+/*--------------------------------------------------------------
+Primary Purpose: Create a word without pointers to components
+Arguments: word is the character string length is the length
+Words created with this constructor have a null component pointer
+Rev: 12/5/95
+
+---------------------------------------------------------------*/
+{
+      charCount = 1;
+      compCount = 0;
+      characters = new char[length];
+      strcpy(characters, word);
+      confid = 255;
+      ul = NOPNT;
+      lr = NOPNT;
+      character = NULL;
+      mispelled = 0;
+}
+
+
+Word::Word(ListElement * first, int count, int charlength)
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments: first is a pointer to a ListElement that contains
+the first Component in the word.  count is the number
+of components in the word. charlength is the actual length of the word
+
+Effects:  Sets all data members of the word class
+Rev:  11/6/95
+---------------------------------------------------------------*/
+{
+      mispelled = 0;
+      Component * firstComp = (Component *) first->item;
+      Component * item;
+      int i;
+      int charOffset=0;
+      ListElement * ptr;
+
+      
+      characters = new char[charlength+1];  // ascii translation
+      character = new (Component *)[count];
+      charCount = charlength;
+      compCount = count;
+      ul = firstComp->ul();
+      
+      confid = 256;
+      
+      for (i = 0, ptr = first; i < count; ptr = ptr->next, i++) 
+	{
+	  item = (Component *)(ptr->item);
+	  strncpy(&(characters[charOffset]),item->fasciiId,item->asciiLen());
+//	  printf("copied %s to %s at %d\n",item->fasciiId, characters,charOffset);
+	  charOffset += item->asciiLen();
+	  
+	  // assert(charOffset <= charlength);
+	  if (charOffset > charlength)
+	    {
+	      printf("\ncharOffset: %d charlength: %d", charOffset,charlength);
+	    }
+	  character[i] = item;
+	  if (item->confid() < confid)
+	    confid = item->confid();
+	  if(item->ul().y() < ul.y())
+	    ul.y() = item->ul().y();
+	  if(i == count-1)      // this is the last character
+	    lr = item->lr();
+	}  
+
+      
+      characters[charlength] = '\0';
+      if(0)
+	printf("Identified a word: %s\n", characters);
+    
+};
+
+
+ Word::~Word()
+{
+  if (characters != NULL)delete characters;
+  if (character != NULL) delete character;
+};
+
+
+Words::~Words()
+{
+  for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+    if (ptr->item != NULL)
+      delete (Word *) (ptr->item);
+     }
+
+}
+
+int Words::writeWordPos(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word position, confidence length and string to file
+Arguments: output file name
+Return Value: 1 for success 0 for file
+Effects: create and write out to filename each word in the following format
+All numeric fields are in fixed columns 7 characters wide
+      upperleft x pos
+      upperleft y pos
+      word confidence
+      character count
+      string  
+      \N
+Rev: KM 11/25
+---------------------------------------------------------------*/
+{
+  FILE * outfile;
+  outfile = fopen(filename, "w");
+  if (outfile == NULL)
+      {
+	printf("Error openning %s", filename);
+	return 0;
+      }
+  
+  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+      {
+	Word * word = (Word *) ptr->item;
+	if (word->ul.x() == -1) continue; // dont print new lines.
+	fprintf(outfile, " %6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
+		          word->confid, word->charCount, word->characters );
+      }
+  fclose(outfile);
+  return 1;
+}
+
+
+int Words::writeWordbox(char * filename, int xoffset=0, int yoffset=0,
+			Page * page=global_page, bool equationsOnly=0)
+/*--------------------------------------------------------------
+Primary Purpose: Write Scanworx wordbox format 
+Arguments: output file name x and y offset, parent page and bool for printing
+equations only
+Return Value: 1 for success 0 for file
+Effects: create and write out to filename each word in the following format
+      string
+      upperleft x pos
+      upperleft y pos
+
+      lowerright x pos
+      lowerright y pos
+
+      upperright x pos
+      upperright y pos
+
+      lowerleft x pos
+      lowerleft y pos
+
+      \N
+Rev: KM 11/25
+---------------------------------------------------------------*/
+{
+  FILE * outfile;
+  outfile = fopen(filename, "w");
+  Point cntr;
+
+  if (outfile == NULL)
+      {
+	printf("Error openning %s", filename);
+	return 0;
+      }
+  fprintf(outfile, "\n"); // print new lines.
+  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+      {
+	Word * word = (Word *) ptr->item;
+	cntr = word->center();
+	if (!(equationsOnly) || (page->inEquation(cntr.x(), cntr.y())))
+	  {
+	    if (word->ul.x() == -1) 
+	      fprintf(outfile, "\n"); // print new lines.
+	    else
+	      fprintf(outfile, "%s %d %d %d %d %d %d %d %d \n",
+		      word->characters,
+		      word->ul.x() + xoffset, word->ul.y() + yoffset,
+		      word->lr.x() + xoffset, word->lr.y() + yoffset,
+		      word->lr.x() + xoffset, word->ul.y() + yoffset,
+		      word->ul.x() + xoffset, word->lr.y() + yoffset);
+	  }	
+      }
+  fclose(outfile);
+  return 1;
+}
+
+
+
+
+
+int Words::writeAscii(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word list to ascii file
+Arguments: filename to write to
+Return Value:  1 if successful 0 if unsuccessful
+Effects: Writes words to fill in text format
+
+Rev: 11/25 KM
+---------------------------------------------------------------*/
+{
+  FILE * outfile;
+  outfile = fopen(filename, "w");
+  Word * prev = (Word *) first->item;
+  if (outfile == NULL)
+      {
+	printf("Error openning %s", filename);
+	return 0;
+      }
+  
+  for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+      {
+	Word * word = (Word *) ptr->item;
+	fprintf(outfile, "%s ", word->characters );
+
+      }
+  fclose(outfile);
+  return 1;
+
+}
+
+
+
+
+
+
+
+
+