reference/ocr-simple/Component.cc
changeset 0 6b8091ca909a
equal deleted inserted replaced
-1:000000000000 0:6b8091ca909a
       
     1 #include "system.h"
       
     2 #include "Component.h"
       
     3 #include "BitMap.h"
       
     4 #include <assert.h>
       
     5 #include "list.h"
       
     6 #include "tcl_interface.h"
       
     7 
       
     8 /*** Component.cc
       
     9   Member functions for Components 
       
    10   Component functions defined in Component.h
       
    11   rev 12/9/95 KM
       
    12 ***/
       
    13 
       
    14 
       
    15 Components::Components()
       
    16 :List()
       
    17 {
       
    18 }
       
    19 
       
    20 
       
    21 Components::~Components()
       
    22 {
       
    23   for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
       
    24     if (ptr->item != NULL)
       
    25       delete (Component *) (ptr->item);
       
    26      }
       
    27   while(!IsEmpty())
       
    28     Remove();
       
    29 
       
    30 }
       
    31 
       
    32 
       
    33 int Component::AddToComponent(ListElement* intrvl, RLEMap* rlemap)
       
    34 //this needs to be fixed to trap page boundaries
       
    35 //or else pad the page with a blank line at top and bottom
       
    36 {
       
    37   assert(intrvl != NULL);
       
    38   List* list = new List();           //make a new queue
       
    39   ListElement* current;
       
    40   ListElement* nextelt;
       
    41   int counter = 0;
       
    42   int currentRow;
       
    43 
       
    44   if (intrvl->previous != NULL)
       
    45     intrvl->previous->next = intrvl->next;
       
    46   else rlemap->fMapData[((RLEPair *) intrvl->item)->row]->first = intrvl->next;
       
    47   if (intrvl->next != NULL)
       
    48     intrvl->next->previous = intrvl->previous;
       
    49   list->first = intrvl;              //put starting interval on queue
       
    50   list->last = intrvl;
       
    51   list->length = 1;
       
    52   intrvl->next = NULL;
       
    53   intrvl->previous = NULL;
       
    54 
       
    55   while ((intrvl = list->first) != NULL)  //Take an interval off queue
       
    56     {
       
    57       currentRow = ((RLEPair *) intrvl->item)->row;
       
    58       for (int i=-1; i < 2; i+=2) { 
       
    59 	current = rlemap->fMapData[currentRow+i]->first;
       
    60 	while ((current != NULL) 
       
    61 	       && (((RLEPair *) current->item)->start <= 
       
    62 		   ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
       
    63 
       
    64 //	  printf("Looking at an interval on row %d that goes from %d to %d\n",
       
    65 //		 currentRow, ((RLEPair *) intrvl->item)->start,
       
    66 //		  ((RLEPair *) intrvl->item)->end);
       
    67 
       
    68 	  if ((((RLEPair *) current->item)->end 
       
    69 	       >= ((RLEPair *) intrvl->item)->start-1) 
       
    70 	      && (((RLEPair *) current->item)->start <= 
       
    71 		  ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
       
    72 //	  printf("Adding connection for interval on row %d that goes from %d to %d\n", currentRow+i,
       
    73 //		 ((RLEPair *) current->item)->start,
       
    74 //		  ((RLEPair *) current->item)->end);
       
    75 	
       
    76 	    if (current->previous != NULL)
       
    77 	      current->previous->next = current->next;   //take off RLEMap
       
    78 	    else
       
    79 	      rlemap->fMapData[currentRow+i]->first = current->next;
       
    80 	    if (current->next != NULL)
       
    81 	      current->next->previous = current->previous;
       
    82 	    nextelt = current->next;
       
    83 	    list->last->next = current;                //add to queue
       
    84 	    current->previous = list->last;
       
    85 	    list->last = current;
       
    86 	    current->next = NULL;
       
    87 	    current = nextelt;
       
    88 	    list->length++;
       
    89 	  } else
       
    90 	    current = current->next;
       
    91 	}
       
    92       }
       
    93       
       
    94       if ((((RLEPair *) intrvl->item)->start < ful.x()) || (ful.x()==-1)) {
       
    95 	ful.x() = ((RLEPair *) intrvl->item)->start;
       
    96 //	printf("Changed ful.x to %d\n", ful.x());
       
    97       }
       
    98       if ((((RLEPair *) intrvl->item)->end > flr.x()) || (flr.x()==-1)) {
       
    99 	flr.x() = ((RLEPair *) intrvl->item)->end;
       
   100 //	printf("Changed flr.x to %d\n", flr.x());
       
   101       }
       
   102       if ((((RLEPair *) intrvl->item)->row < ful.y()) || (ful.y()==-1)) {
       
   103 	ful.y() = ((RLEPair *) intrvl->item)->row;
       
   104 //	printf("Changed ful.y to %d\n", ful.y());
       
   105       }
       
   106       if ((((RLEPair *) intrvl->item)->row > flr.y()) || (flr.y()==-1)) {
       
   107 	flr.y() = ((RLEPair *) intrvl->item)->row;
       
   108 //	printf("Changed flr.y to %d\n", flr.y());
       
   109       }      
       
   110       list->first = intrvl->next;
       
   111       if (intrvl->next != NULL)
       
   112 	intrvl->next->previous = NULL;
       
   113       delete ((RLEPair *) (intrvl->item));
       
   114       delete intrvl;               //so the letter O won't go forever
       
   115       counter++;
       
   116       list->length--;
       
   117     }
       
   118 
       
   119   delete list;
       
   120   return counter;
       
   121 
       
   122 }
       
   123 
       
   124 void Component::setProperties(BitMap * map)  // was BitMap
       
   125 /*--------------------------------------------------------------
       
   126 Primary Purpose: Set the property vector for this component
       
   127 Arguments:  The BitMap to which this component belongs
       
   128 Return Value:
       
   129 Effects: The component is divided into a 5 by 5 grid.  A gray
       
   130     scale (0 - 255) for each section is determined.  The gray scale
       
   131     is 0 for all white, 255 for all black, but normally will be somewhere
       
   132     between the two.  The gray scales are represented in properties
       
   133     0-24. 
       
   134     Property 25 is the grayscale accross the top.
       
   135     Property 26 is the grayscale accross the bottom.
       
   136     Property 27 is the width/height ratio again scaled to (0-255)
       
   137     Actually the formula for property 27 is
       
   138         width/ height * 255  if height > width
       
   139         1- height/width * 255 if width > height
       
   140     This way near 0 is very tall and thin
       
   141              near 128 height near width
       
   142 	     near 255 very wide
       
   143     property 26 is height/width ratio 255 if height > width.
       
   144     These values are put into fProperty array in this component.
       
   145     Property 27 is 255 if the component is vertically disjoint like i or ;
       
   146                or 0 if it is not.
       
   147     Property 28 is  Indicator of a vertically disjoint character
       
   148      like i and j.
       
   149     Also the total number of black pixels is set in fnumBits.
       
   150     // This is not used at this time.    
       
   151 Constraints: The data fields ful and flr must already be set 
       
   152    before calling this function. These fields specify a bounding
       
   153    box for the character within the BitMap.
       
   154 Rev: 12/9 KM
       
   155 ---------------------------------------------------------------*/
       
   156 {
       
   157   if (ful > flr) 
       
   158     printf("Problem\n");
       
   159   assert (ful <= flr);
       
   160   short int hflag[NumHorizDiv + 1];  // flags horizontal section dividers
       
   161   short int vflag[NumVertDiv + 1];   // flags vertical section dividers
       
   162   float height, width;
       
   163   int propNum;
       
   164   float darkest = 0;
       
   165   float lightest; 
       
   166   int darkrow = 0;
       
   167   int lightrow = 0;
       
   168 
       
   169   Point sectionLr, sectionUl;
       
   170   // Set Number of bits
       
   171   fnumBits = map->pixelsInRegion(ful, flr);
       
   172 
       
   173   setSectionFlags(hflag, vflag);
       
   174   for (int r = 0; r < NumVertDiv; r++)
       
   175     for (int c = 0; c < NumHorizDiv; c++)
       
   176       {
       
   177 	propNum = (r * NumHorizDiv) + c;
       
   178 	sectionUl = Point(hflag[c], vflag[r]);
       
   179 	sectionLr = Point(hflag[c+1]-1, vflag[r+1]-1);
       
   180 	if (sectionUl <= sectionLr)
       
   181 	  fproperty[propNum] = map->grayScale(sectionUl, sectionLr); 
       
   182 	assert(fproperty[propNum] >= 0 && fproperty[propNum] < 256);
       
   183       }
       
   184 
       
   185   // set the height/width ratio
       
   186   // 0 is very thin 128 is even 256 is very wide.
       
   187   width = flr.x() - ful.x() + 1;
       
   188   height = flr.y() - ful.y() + 1;
       
   189  
       
   190   // Grayscale across the top - Indicator of top bar
       
   191   sectionUl = Point(ful.x(), ful.y());
       
   192   sectionLr = Point(flr.x(), ful.y() + (int)(height/(NumVertDiv*2)));
       
   193   fproperty[25] = map->grayScale(sectionUl, sectionLr);
       
   194 
       
   195    // Grayscale across bottom - Indicator of a foot for l opposed to 1
       
   196   sectionUl = Point(ful.x(),  flr.y() -  (int)(height/(NumVertDiv*2)));
       
   197   sectionLr = Point(flr.x(),  flr.y());
       
   198   fproperty[26] = map->grayScale(sectionUl, sectionLr);
       
   199 
       
   200   float hdivw = (float)height/width;
       
   201   float wdivh = (float) width/height;
       
   202   if (width > height)
       
   203        fproperty[27]= (short int) ((1- hdivw/2)*255);
       
   204   else
       
   205     fproperty[27] = (short int)((wdivh/2)* 255);
       
   206 
       
   207   // is this a disjoint character like i or j  255 = yes 0 = no
       
   208   fproperty[28]=0;
       
   209   lightest = width;
       
   210   for(int row = ful.y(); row < flr.y(); row++)
       
   211       {
       
   212 	int pixelsThisRow = pixelsBetween(map->row(row), ful.x(), flr.x());
       
   213 	if(!(pixelsThisRow))
       
   214 	      fproperty[28]=255;
       
   215       }
       
   216 
       
   217   fproperty[29]= 0;
       
   218     for(int p = 0; p < numProperties; p++)	
       
   219       assert(fproperty[p] >= 0 && fproperty[p] < 256);
       
   220 
       
   221 
       
   222 }
       
   223 
       
   224 void Component::setSectionFlags(short int hflag[], short int vflag[])
       
   225 /*--------------------------------------------------------------
       
   226 Primary Purpose: Breaks this component into a grid NumHorizDiv X NumVertDiv
       
   227     for determining grayscale property vectors.
       
   228 Arguments:  hflag[] is an empty array to be filled by this procedure with
       
   229     the starting columns of each horizontal subdivision.  vflag[] will
       
   230     be filled with the vertical subdivisions.
       
   231 Effects:  fills hflag[] with the starting column for each subdivision.
       
   232      The last element of the array is actually the pixel immediately 
       
   233      following the last  subdivision.  The last subdivision contains any 
       
   234      remaining pixels that did not divide evenly amongst the divisions.
       
   235      vflag[NumHorizDiv] is comparable for vertical supdivisions.
       
   236      Example ful = (0,25) flr = (52,46) NumHorizDiv = NumVertDiv = 5
       
   237          hflag[6] = { 0,10,20,30,40,53 }
       
   238 	 vflag[6] = {25.29.33.37.41.47 } 
       
   239 Constraints: ful and flr must be set to mark the bounding box before
       
   240    calling this procedure.
       
   241 Rev:  10/27 KM
       
   242 ---------------------------------------------------------------*/
       
   243 {
       
   244   int ulx = ful.x();  int uly = ful.y();
       
   245   int lrx = flr.x();  int lry = flr.y();
       
   246 
       
   247   int width =  lrx - ulx+1;
       
   248   int height = lry - uly+1;
       
   249 
       
   250   int horizDiv = width/NumHorizDiv;
       
   251   int vertDiv = height/NumVertDiv;
       
   252 
       
   253   int horizExtra = width - horizDiv*NumHorizDiv;
       
   254   int vertExtra = height - vertDiv*NumVertDiv;
       
   255 
       
   256   int i, add;
       
   257   for (i = 0; i < NumHorizDiv; i++)
       
   258     {
       
   259     if(horizExtra - i > 0) add = i; else add = horizExtra;
       
   260     hflag[i] = ulx + (i*horizDiv)+ add;
       
   261   }
       
   262   hflag[i] = lrx + 1;              // Closes off last division
       
   263 
       
   264   int j;
       
   265   for(j = 0; j < NumVertDiv; j ++)
       
   266       {
       
   267     if(vertExtra - j > 0) add = j; else add = vertExtra;
       
   268     vflag[j] = uly + (j*vertDiv)+ add;
       
   269   }
       
   270   vflag[j] = lry + 1;
       
   271 
       
   272     
       
   273 
       
   274 
       
   275 }
       
   276 
       
   277 
       
   278 Distance Component::distance(Component * comp) 
       
   279 /*--------------------------------------------------------------
       
   280 Primary Purpose: Determines heuristic distance between two components
       
   281 Arguments:  Another component to compare
       
   282 Return Value: integer value which represents the distance between two 
       
   283    components. Distance = sum over i of
       
   284                        weight *square (this->fproperty[i] - comp->fproperty[i])
       
   285 		       weight for i == 27, 28 is 3 weight is 1 for all other 
       
   286 		       properties
       
   287 Constraints: setProperties must have been run on both components
       
   288 
       
   289 Rev: 11/1 KM
       
   290 ---------------------------------------------------------------*/
       
   291 {
       
   292   Property * a = fproperty;
       
   293   Property * b = comp->properties();
       
   294   Distance dist=0;
       
   295   int dif=0;
       
   296   int worst = 0;
       
   297   int weight = 1;
       
   298 
       
   299   for(int i= 0; i < numProperties; i++)
       
   300     { 
       
   301 
       
   302 
       
   303      if (i == 27 || i == 28) weight = 3;
       
   304      else weight = 1;
       
   305      
       
   306        dif = (a[i] - b[i]);      
       
   307       dist += weight*dif*dif;
       
   308     }
       
   309 
       
   310   return dist;
       
   311 }
       
   312 
       
   313 
       
   314 
       
   315 
       
   316 
       
   317 
       
   318 
       
   319 
       
   320 
       
   321 void printVector(short int vector[], int size)
       
   322 {
       
   323   for (int i = 0; i < size; i++)
       
   324     cout << vector[i] << " " ;
       
   325   cout << endl;
       
   326 
       
   327 }
       
   328 
       
   329 
       
   330 void testProperties(Component* c, BitMap * map)
       
   331 {
       
   332   short int hflag[NumHorizDiv + 1];  // flags horizontal section dividers
       
   333   short int vflag[NumVertDiv + 1];   // flags vertical section dividers
       
   334 
       
   335   cout << "First test subDivisions  " << endl;
       
   336   c->setSectionFlags(hflag, vflag);
       
   337   cout << "Horizontal flags" <<endl;
       
   338   printVector(hflag, NumHorizDiv + 1);
       
   339   cout << "Vertical flags" <<endl;
       
   340   printVector(vflag, NumHorizDiv + 1);
       
   341 
       
   342   cout << "Now lets look at the properties " << endl;
       
   343   // setSectionFlags will actually get called again within setProperties
       
   344   c->setProperties(map);
       
   345   printVector(c->properties(), NumHorizDiv*NumVertDiv + 1);
       
   346   cout << endl << " The distance of this component from itself: " << " ";
       
   347   cout << c->distance(c) << endl;
       
   348 
       
   349 }
       
   350 
       
   351 void Component::display_bounding_box()
       
   352 {
       
   353   int ulx = (ul()).x();
       
   354   int uly = (ul()).y();
       
   355   int lrx = (lr()).x();
       
   356   int lry = (lr()).y();
       
   357   scale(ulx); scale(uly); scale(lrx); scale(lry);
       
   358   
       
   359   docommand(".main_window.display.work_space create rectangle %d %d %d %d -outline blue -tags IMAGE_TAG",  ulx, uly, lrx, lry);
       
   360 }  
       
   361 
       
   362 Distance Component::recognize(Component * learnedchars)
       
   363 // This is out of date. Current recognize is below
       
   364 {
       
   365   Distance d, nextd;
       
   366   char id;
       
   367 
       
   368 //  printf("Another call to recognize\n");
       
   369   d = (256*256)*numProperties;  // this is the biggest distance
       
   370                         
       
   371 
       
   372 	       
       
   373     for (int i = 0; i < 256; i++)
       
   374 	{
       
   375 	  if(learnedchars[i].confid() != 0)
       
   376 	      {
       
   377 		nextd = distance(&learnedchars[i]);
       
   378 //		printf("Distance = %d, character = %c\n", nextd, i);
       
   379 		if (nextd < d)
       
   380 		    {
       
   381 		      d = nextd;
       
   382 		      id = (char) i;
       
   383 		    }
       
   384 	      }
       
   385 	  
       
   386 	}
       
   387   
       
   388   fasciiId = id;
       
   389   /*  printf("Recognized a Component: %c\n", id); */
       
   390   return d;
       
   391 }
       
   392 
       
   393 
       
   394 Distance Component::recognize(Components * learnedgroups, bool allGroups)
       
   395 {
       
   396   Distance d, worstDistance,nextd;
       
   397   char id;
       
   398   short int fontid;
       
   399   float tempd;
       
   400   worstDistance = 150000;
       
   401 
       
   402 //  printf("Another call to new recognize\n");
       
   403   d = (65536)*numProperties;  // this is the biggest distance
       
   404   
       
   405                         
       
   406     fconfid = 0;
       
   407 
       
   408     for(int g = 0; g < NumCharGroups &&
       
   409  	((fconfid < ConfidenceThreshold) || allGroups); g++)
       
   410     {
       
   411       int offset = (charGroup+g) % NumCharGroups;
       
   412       if (offset == 4 && charGroup != 4) continue;
       
   413       for (ListElement* ptr = learnedgroups[offset].first; ptr != NULL;
       
   414 	 ptr = ptr->next)
       
   415 	{
       
   416 	  Component * item = (Component *) ptr->item;
       
   417 	  nextd = distance(item);
       
   418 //	  printf("Distance = %d, character = %c\n", nextd, i);
       
   419 	  if (nextd < d)
       
   420 	      {
       
   421 		d = nextd;
       
   422 		id = item->fasciiId;
       
   423 		fontid = item->ffontId;
       
   424 	      }
       
   425 
       
   426 	}
       
   427 
       
   428       if (d >= worstDistance) 
       
   429 	  tempd = worstDistance - 1;
       
   430       else tempd = d;
       
   431 	
       
   432 	fconfid = (unsigned short int)
       
   433 	  (255 - (tempd/worstDistance)*256);
       
   434       if(charGroup == 4) break;  // dont check other groups for floaters
       
   435     }
       
   436   
       
   437 
       
   438   fasciiId = id;
       
   439 
       
   440 
       
   441   ffontId = fontid;
       
   442 //  printf("Recognized a Component: %c in font#%d  %d %u\n", 
       
   443 //	 id, fontid, fconfid, d);
       
   444 
       
   445 
       
   446 
       
   447   return d;
       
   448 }
       
   449 
       
   450 
       
   451 
       
   452 int Component::vertShrink(BitMap * bitmap)
       
   453 {
       
   454   int r;
       
   455   int shrunk = 0;
       
   456 	for(r = ful.y(); r < flr.y(); r++)
       
   457 	      if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
       
   458 		  {
       
   459 		    ful.y() = r;
       
   460 		    shrunk = 1;
       
   461 		    break;
       
   462 		  }
       
   463 	for(r = flr.y(); r > ful.y(); r--)
       
   464 	      if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
       
   465 		  {
       
   466 		    flr.y() = r;
       
   467 		    shrunk = 1;
       
   468 		    break;		    
       
   469 		  }
       
   470 		
       
   471   return shrunk;
       
   472       }
       
   473 
       
   474 
       
   475 
       
   476