reference/ocr-new/learn.cc
author viric@llimona
Thu, 18 May 2006 23:12:51 +0200
changeset 0 6b8091ca909a
permissions -rw-r--r--
Init from working directory of svn repository.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     1
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     2
 Learn.cc - 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     3
 readlearnfiles - sources the tcl file to change learn files
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     4
 learn(char * tifffile, char * asciifile)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     5
 Performs character learning by reading tiff and  ascii translation
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     6
 Characters are partitioned into character groups as described 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     7
 in system.cc.  See learn function for more details
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     8
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
     9
 writeLearnedGroups(char * filename) Writes learned character to file
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    10
 readLearnedGroups(char * filename) Reads saved learned characters 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    11
                                    from file. 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    12
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    13
#include "tcl_interface.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    14
#include "system.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    15
#include "learn.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    16
#include "Page.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    17
#include "list.h"
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    18
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    19
void readLearnFiles()
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    20
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    21
Primary Purpose: Sources learnfile.tcl where new learn files can be 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    22
specified without recompiling   **/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    23
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    24
  docommand("source learnfile.tcl");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    25
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    26
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    27
bool whitespace(char c)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    28
// Returns TRUE if c is a whitespace charater (called by learn.cc)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    29
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    30
  if ( c == '\n' || c == '\t' || c == ' ') return TRUE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    31
  return FALSE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    32
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    33
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    34
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    35
bool blank(char * string)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    36
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    37
  if (string == NULL) return TRUE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    38
  int len = strlen(string);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    39
  for(int c=0; c< len; c++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    40
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    41
    if (!(whitespace(string[c])))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    42
	return FALSE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    43
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    44
  return TRUE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    45
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    46
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    47
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    48
void printLearnedGroups()
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    49
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    50
  // Just print these guys out to make sure they are ok.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    51
    for(unsigned int i = 0; i < NumCharGroups; i++)   
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    52
	for(ListElement * ptr = LearnedGroups[i].first; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    53
	    ptr != NULL; ptr = ptr->next)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    54
	  { Component * item = (Component *) ptr->item;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    55
	  printf("learned char %s, group %d\n", item->fasciiId, 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    56
			  item->charGroup);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    57
	  }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    58
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    59
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    60
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    61
int lengthNextWord(char * buffer,int offset, int buflength)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    62
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    63
  // counts things in '< >' as one character
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    64
  int count;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    65
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    66
  for(int c=offset; c < buflength && !(whitespace(buffer[c])); c++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    67
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    68
	 if(buffer[c] == '<')
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    69
	   {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    70
	     while((buffer[c] != '>') && (c < buflength))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    71
	       c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    72
	     count++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    73
	   }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    74
	 else
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    75
	   count++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    76
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    77
  return count;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    78
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    79
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    80
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    81
int learn(Component * comp, char * id, Confidence threshold)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    82
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    83
Primary Purpose: Make a copy of this component and add it to 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    84
                 LearnedGroups. id is ascii identification.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    85
		 Component will only be learned if confidence
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    86
		 is below threshold or if id and asciiid dont match
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    87
Arguments: comp - component to learn
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    88
                  id - ascii identification
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    89
		  threshold - confidence threshold for learning
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    90
Return Value: 1 if component was learned, 0 otherwise
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    91
Rev: 4/25/96
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    92
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    93
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    94
  Component * newcomp;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    95
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    96
 if (comp->confid() < threshold || !(strcmp(comp->fasciiId, id)))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    97
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    98
      newcomp = comp->copy();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
    99
      delete newcomp->fasciiId;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   100
      newcomp->fasciiId = new char[strlen(id)+1];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   101
      strcpy(newcomp->fasciiId , id);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   102
      
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   103
      LearnedGroups[newcomp->charGroup].Append(newcomp);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   104
      return 1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   105
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   106
  return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   107
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   108
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   109
void learn(char * tifFile, char * asciiFile, bool synchwords)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   110
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   111
Primary Purpose:  Learns from TIFF and ascii file.  Groups learned
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   112
                  characters by baseline into LearnedGroups and
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   113
                   sets properties.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   114
Arguments: tiffFile name of a tiff file to learn from
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   115
           asciiFile name of an ascii translation file
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   116
Effects:  Assumes a one to one correspondence between each connected
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   117
component on a line of the tif file and each character on the corresponding
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   118
line of the ascii file. 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   119
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   120
Rev:  4/26/96
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   121
---------------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   122
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   123
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   124
  Page * learnPage = new Page;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   125
  initCharBitsSet();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   126
  if(learnPage->readMap(tifFile) != VALID)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   127
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   128
      printf("Problem opening the learn image file (file doesn't exist?)\n");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   129
      return;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   130
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   131
  learnPage->setLines();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   132
  learnPage->extractComponents(MinHorizSeparation);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   133
  learnPage->extractWords();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   134
  learn(learnPage, asciiFile, synchwords);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   135
  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   136
  //  delete learnPage; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   137
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   138
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   139
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   140
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   141
void learn(Page * learnPage, char * asciiFile, bool synchWords)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   142
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   143
Primary Purpose:  Learns from a Page and an ascii file.  Used from
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   144
                  tcl user interface under File/Learn opation
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   145
		  Groups learned
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   146
                  characters by baseline into LearnedGroups and
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   147
                   sets properties.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   148
Arguments: tiffFile name of a tiff file to learn from
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   149
           asciiFile name of an ascii translation file
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   150
Effects:  Assumes a one to one correspondence between each connected
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   151
component on a line of the tif file and each character on the corresponding
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   152
line of the ascii file. 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   153
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   154
Rev:  4/26/96
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   155
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   156
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   157
  FILE * transFile;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   158
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   159
  transFile = fopen(asciiFile,"r");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   160
  if(!transFile)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   161
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   162
      printf("Could not open the ascii learn file");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   163
      return;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   164
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   165
  if (LearnedGroups == NULL)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   166
    LearnedGroups = new Components[NumCharGroups]; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   167
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   168
  int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   169
  char buffer[maxCharsPerLine];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   170
  int i = -1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   171
  int buflength=0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   172
  bool instring= FALSE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   173
  bool emptyLine;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   174
  Components * components = NULL;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   175
  Words * words;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   176
  Component * item;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   177
  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   178
  double width, height = 0.0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   179
  int h;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   180
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   181
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   182
      words = learnPage->words();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   183
      int c = 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   184
      Word * word;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   185
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   186
      for (ListElement * ptr = words->first; ptr != NULL &&
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   187
	  (i < learnPage->numLines()) ; ptr = ptr->next)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   188
	{	
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   189
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   190
	  word = (Word *) ptr->item;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   191
	  // if new line get new  text line
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   192
	  if (word->characters[0] == '\n' || buflength == 0)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   193
	    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   194
	      char * ok;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   195
	      do {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   196
		ok =fgets(buffer, maxCharsPerLine, transFile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   197
	      } while (ok && blank(buffer)); // skip blank lines.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   198
	      buflength= strlen(buffer);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   199
              components = learnPage->line(++i);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   200
	      c =0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   201
	      if (word->characters[0] == '\n') continue;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   202
	    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   203
	  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   204
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   205
	  // skip over white space
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   206
	  while(whitespace(buffer[c]) && c < buflength)c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   207
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   208
	  // Make sure we have an equal # of components characters
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   209
	  if (synchWords && 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   210
	      (word->charCount == lengthNextWord(buffer,c,buflength)))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   211
	    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   212
	      // skip over this word
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   213
	      while(!(whitespace(buffer[c])) && c < buflength)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   214
		c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   215
	      continue; // move on to the next word
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   216
	    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   217
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   218
	  for (int ch = 0; ch < word->charCount; ch++) 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   219
	    { 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   220
	      while(whitespace(buffer[c]) && c < buflength)c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   221
	      item = word->character[ch]; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   222
	      if (c >= buflength) break;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   223
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   224
		 // Link string translation to component.  Characters between
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   225
		 // brackets are for one component.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   226
		   if(buffer[c] == '<' && !instring)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   227
		     {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   228
		       instring = TRUE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   229
		       int startString = c;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   230
		       while(c++ < buflength && buffer[c] != '>');
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   231
		       int endString = c+1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   232
		       
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   233
		       int stringSize = endString - startString;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   234
		       char newstring[stringSize+1];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   235
		       strncpy(newstring, &buffer[startString],stringSize);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   236
		       newstring[stringSize] = '\0';
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   237
		       // learn if id's don't match or below threshold
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   238
		       learn(item, newstring, ConfidenceThreshold);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   239
		       c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   240
		       instring = FALSE;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   241
		     }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   242
		   else
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   243
		     {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   244
                       char newstring[2];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   245
      		       newstring[0] = buffer[c++];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   246
		       newstring[1]= '\0';
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   247
		       learn(item, newstring, ConfidenceThreshold);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   248
		     }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   249
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   250
		   LearnedGroups[item->charGroup].Append(item);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   251
		   //ptr->item = NULL; // Set to Null in page so it wont get
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   252
	                    // clobbered on delete
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   253
		   h = item->lr().y() - item->ul().y();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   254
		   if (h > height) height = h;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   255
		   width = item->lr().x() - item->ul().x();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   256
		   if (height/width > MaxHWRatio)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   257
		     MaxHWRatio = height/width;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   258
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   259
		   if (h/width < MinHWRatio)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   260
		     MinHWRatio = h/width;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   261
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   262
		   if (width < MinWidth)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   263
		     MinWidth = (int) width;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   264
	  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   265
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   266
		 }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   267
	}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   268
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   269
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   270
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   271
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   272
  if (fgets(buffer, maxCharsPerLine, transFile))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   273
      printf("Uh, oh. There are more characters to learn!\n");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   274
  /*  printf("Maximum height/width ratio = %f\n", MaxHWRatio); */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   275
  /*  printf("Minimum height/width ratio = %f\n", MinHWRatio); */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   276
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   277
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   278
  // printLearnedGroups();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   279
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   280
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   281
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   282
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   283
int writeLearnedGroups(char * filename)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   284
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   285
Primary Purpose:  Write Learned groups out to file for reading
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   286
                  in by readLearnedGroups
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   287
Arguments: filename to write learned chars to 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   288
Return Value: 1 if successful 0 if not
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   289
Effects:  Writes contents of LearnedGroups array out to filename
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   290
LearnedGroups is an array of lists of components that is decleared
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   291
in system.cc and initialized by the learn() function.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   292
For each group writes the number of Components the group contains
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   293
followed by the group data.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   294
Other learned values such as MinWidth MinHWRatio etc are written to
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   295
the file as well.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   296
Constraints: LearnedGroups must be initialized and filled with learned
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   297
chars before this function is invoked.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   298
Rev: 11/27 KM
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   299
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   300
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   301
  int status;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   302
  FILE * outfile;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   303
  assert(LearnedGroups != NULL);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   304
  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   305
  outfile = fopen(filename, "w");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   306
  if (outfile == NULL)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   307
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   308
	printf("error openning %s \n", filename);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   309
	return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   310
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   311
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   312
  // Write global information about learned characters
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   313
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   314
  fwrite(&NumCharGroups, sizeof(NumCharGroups),1, outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   315
  fwrite(&MaxHWRatio, sizeof(MaxHWRatio),1, outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   316
  fwrite(&MinWidth, sizeof(MinWidth),1,outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   317
  fwrite(&MinHWRatio, sizeof(MinHWRatio),1,outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   318
  for(unsigned int i = 0; i < NumCharGroups; i++)   
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   319
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   320
	unsigned int numChars = LearnedGroups[i].length;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   321
	// Write group number and number of characters
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   322
	fwrite(&i, sizeof(i), 1, outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   323
	status = fwrite(&numChars, sizeof(numChars),1,outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   324
	if (status == 0) return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   325
	for(ListElement * ptr = LearnedGroups[i].first; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   326
	    ptr != NULL; ptr = ptr->next)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   327
	    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   328
	      
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   329
	      Component * comp = (Component *) ptr->item;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   330
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   331
	      status = fwrite(comp, sizeof(Component),1,outfile);	     
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   332
//	      printf("\tChar:%c status:%d \n", comp->asciiId(), status);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   333
	      int stringSize = strlen(comp->fasciiId) +1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   334
	      status = fwrite(&stringSize, sizeof(stringSize),1,outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   335
              status = fwrite(comp->fasciiId, stringSize,1,outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   336
	      for(int p = 0; p < numProperties; p++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   337
		  {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   338
		    status = fwrite(&(comp->fproperty[p]), 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   339
				  sizeof(Property),
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   340
				  1, outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   341
		    if (status == 0) 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   342
			{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   343
			  printf("Error writing properties of comp %c",
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   344
				 comp->asciiId());
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   345
			  return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   346
			}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   347
		  }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   348
	    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   349
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   350
  status = fclose(outfile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   351
  if (status == -1) return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   352
  else return 1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   353
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   354
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   355
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   356
int readLearnedGroups(char * filename)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   357
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   358
Primary Purpose:  Read Learned groups from file that has been
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   359
                  created by writeLearnedGroups
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   360
Arguments: filename to read learned chars from 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   361
Return Value: 1 if successful 0 if not
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   362
Effects:  Reads contents of filename into LearnedGroups array
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   363
LearnedGroups is an array of lists of components that is decleared
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   364
in system.cc and initialized here or in the learn() function.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   365
Constraints: LearnedGroups must not yet be initialized
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   366
Rev: 11/27 KM
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   367
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   368
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   369
  int status;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   370
  FILE * infile;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   371
  unsigned int numGroups;           // # of groups stored in file.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   372
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   373
  initCharBitsSet();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   374
  if(LearnedGroups == NULL)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   375
    LearnedGroups = new Components[NumCharGroups];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   376
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   377
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   378
  infile = fopen(filename, "r");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   379
  if (infile == NULL)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   380
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   381
	printf("error openning %s \n", filename);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   382
	return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   383
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   384
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   385
  // Read Globals
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   386
  fread(&numGroups, sizeof(numGroups),1, infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   387
  assert(numGroups == NumCharGroups);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   388
  fread(&MaxHWRatio, sizeof(MaxHWRatio),1, infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   389
  fread(&MinWidth, sizeof(MinWidth),1,infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   390
  fread(&MinHWRatio, sizeof(MinHWRatio),1,infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   391
  for(unsigned int i = 0; i < NumCharGroups; i++)   
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   392
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   393
	unsigned int groupnum;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   394
	unsigned int numChars;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   395
	fread(&groupnum, sizeof(groupnum), 1, infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   396
	assert(groupnum == i);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   397
	fread(&numChars, sizeof(numChars),1,infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   398
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   399
	printf("\nReading group %d - %d characters\n",i,numChars);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   400
	for(unsigned int c = 0; c< numChars; c++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   401
	    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   402
	      Component * comp = new Component;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   403
	      short int * savepropptr = comp->fproperty;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   404
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   405
	      status = fread(comp, sizeof(Component),1,infile);	     
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   406
	      int stringSize;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   407
	      status = fread(&stringSize, sizeof(stringSize),1,infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   408
              comp->fasciiId = new  char[stringSize];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   409
              status = fread(comp->fasciiId, stringSize,1,infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   410
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   411
	      comp->fproperty = savepropptr;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   412
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   413
	      for(int p = 0; p < numProperties; p++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   414
		  {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   415
		    status = fread(&(comp->fproperty[p]), sizeof(Property),
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   416
			      1, infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   417
		    if (status == 0) 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   418
		      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   419
			printf("Error reading properties");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   420
			return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   421
		      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   422
		  }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   423
//	      printf("\tChar:%c status:%d ", comp->asciiId(), status);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   424
//	      printVector(comp->properties(), numProperties);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   425
	      LearnedGroups[i].Append(comp);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   426
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   427
	    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   428
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   429
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   430
  status = fclose(infile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   431
  if (status == -1) return 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   432
  else return 1;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   433
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   434
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   435
void testLearn()
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   436
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   437
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   438
  learn("/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/train.tif",
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   439
	   "/amd/nfs/cochise/home/ee/cs169/fa95class/cs169-ab/train.txt"); 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   440
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   441
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   442
/*****************************************************************
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   443
  FUNCTIONS BEYOND THIS POINT ARE FOR AVERAGING LEARNED CHARACTERS
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   444
  AND ARE NOT CURRENTLY USED.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   445
*******************************************************************/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   446
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   447
void initLearnedChars()
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   448
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   449
Primary Purpose: Initializes learned character array. Sets asciiId
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   450
to array offset.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   451
Rev: KM 11/6/95
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   452
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   453
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   454
  LearnedChars = new Component[256];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   455
 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   456
  for (int i=0; i < 256; i++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   457
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   458
      LearnedChars[i].asciiId() = (char)i;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   459
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   460
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   461
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   462
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   463
void oldlearn(char * tifFile, char * asciiFile)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   464
/*--------------------------------------------------------------
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   465
Primary Purpose:  builds property vectors for LearnedChars array
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   466
Arguments: tiffFile name of a tiff file to learn from
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   467
           asciiFile name of an ascii translation file
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   468
Effects:  Assumes a one to one correspondence between each connected
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   469
component on a line of the tif file and each character on the corresponding
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   470
line of the ascii file.  For learned characters confidence is set
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   471
to the number of examples.
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   472
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   473
Rev:  11/6/95
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   474
---------------------------------------------------------------*/
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   475
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   476
  FILE * transFile;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   477
  transFile = fopen(asciiFile,"r");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   478
  Page * learnPage = new Page;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   479
  initCharBitsSet();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   480
  learnPage->readMap(tifFile);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   481
  learnPage->setLines();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   482
  learnPage->extractComponents(MinHorizSeparation);         /* why minlinesize? */
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   483
  int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   484
  char buffer[maxCharsPerLine];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   485
  int i = 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   486
  int buflength;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   487
  bool emptyLine;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   488
  Components * components;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   489
  Component * item;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   490
  int count[256]; // a count of how many of each char have been encountered
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   491
  int prop[256][numProperties]; // Character property sums. Need ints so that 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   492
                                 // property sum does
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   493
                                // not exceed char boundaries
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   494
  char id;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   495
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   496
  initLearnedChars();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   497
  for (i = 0; i < 256; i++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   498
    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   499
      count[i] = 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   500
      for (int p  = 0; p < numProperties; p++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   501
	prop[i][p] = 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   502
    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   503
  i=0; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   504
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   505
  int offset;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   506
  while (i < learnPage->numLines() &&  
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   507
	 fgets(buffer, maxCharsPerLine, transFile))
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   508
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   509
	buflength = strlen(buffer);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   510
	components = learnPage->line(i++);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   511
	int c = 0;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   512
	for (ListElement* ptr = components->first; ptr != NULL; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   513
	     ptr = ptr->next) 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   514
	  {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   515
	  item = (Component *)(ptr->item);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   516
	  // skip over white space
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   517
	  while(whitespace(buffer[c]) && c < buflength)c++;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   518
	  if (c >= buflength)break;
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   519
	  id =  buffer[c++];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   520
	  count[id]++;  // increment character count
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   521
	  for (offset=0; offset < numProperties; offset++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   522
	    prop[id][offset] += (item->properties())[offset];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   523
	  LearnedChars[i].numBits() += item->numBits();
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   524
	}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   525
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   526
  // now divide by count and put in Learned character
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   527
  for(int j = 0; j < 256; j++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   528
      {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   529
	if(count[j] > 0)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   530
	    {
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   531
	      for (int offset=0; offset < numProperties; offset++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   532
		prop[j][offset] /= count[j];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   533
	      LearnedChars[j].numBits() /= count[j]; 
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   534
	      LearnedChars[j].confid() = count[j];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   535
	      for (offset=0; offset < numProperties; offset++)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   536
		(LearnedChars[j].properties())[offset] = prop[j][offset];
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   537
//	      printf("%d occurrences of %c\n", count[j], (char)j);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   538
	      printVector(LearnedChars[j].properties(), numProperties);
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   539
			   
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   540
	    }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   541
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   542
      }
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   543
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   544
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   545
void oldtestLearn()
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   546
{
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   547
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   548
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   549
  learn("train.tif", "train.txt");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   550
  if (ENABLE_USER_INTERFACE)
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   551
  docommand(".main_window.display.work_space delete IMAGE_TAG");
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   552
}
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   553
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   554
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   555
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   556
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   557
6b8091ca909a Init from working directory of svn repository.
viric@llimona
parents:
diff changeset
   558