Init from working directory of svn repository.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Bitmap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,59 @@
+#include "Bitmap.h"
+#include "Errors.h"
+#include "Histogram.h"
+
+Bitmap::Bitmap(int w, int h)
+{
+ int i;
+
+ height = h;
+ width = w;
+
+ // Assignem memòria
+ pixels = new (char*)[height];
+ for (i=0; i<height; i++)
+ pixels[i] = new char[width];
+ // Memòria assignada
+
+ //PointsPerLine = new vector<int>
+}
+
+Bitmap::~Bitmap()
+{
+ int i;
+ for (i=0; i<height; i++)
+ delete[] pixels[i];
+
+ delete[] pixels;
+}
+
+void Bitmap::setToZero()
+{
+ int i,j;
+
+ for (i=0; i<height; i++)
+ for (j=0; j<width; j++)
+ pixels[i][j] = 0;
+}
+
+int Bitmap::points_per_row(int line)
+{
+ int weight=0;
+
+ for(int j=0; j<width; j++)
+ if (pixels[line][j])
+ ++weight;
+ return weight;
+}
+
+void Bitmap::calcHistogram(Histogram* &myhist)
+{
+ int i,j;
+
+ myhist = new Histogram(height);
+
+ for(i=0; i<height; i++)
+ myhist->weights[i] = points_per_row(i);
+
+ myhist->calcStatistics();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Bitmap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,32 @@
+#ifndef __BITMAP_H__
+#define __BITMAP_H__
+
+#include "Histogram.h"
+#include <vector>
+
+using namespace std;
+
+class Bitmap
+{
+ int width;
+ int height;
+ vector<int> *PointsPerLine;
+ vector<int> *PointsPerColumn;
+
+public:
+ Bitmap(int w, int h);
+ ~Bitmap();
+
+ void setToZero();
+
+ int get_width() const { return width; };
+ int get_height() const { return height;};
+
+ int points_per_row(int row);
+ int points_per_column(int column);
+ void calcHistogram(Histogram* &myhist);
+
+ char **pixels;
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Errors.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,15 @@
+#include "Errors.h"
+#include <stdlib.h>
+#include <iostream>
+
+using namespace std;
+
+void Errors::Debug(char *text)
+{
+ cout << "Debug: " << text << '\n';
+}
+void Errors::Fatal(char *text)
+{
+ cout << "Fatal: " << text << '\n';
+ exit(1);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Errors.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,7 @@
+
+class Errors
+{
+public:
+ static void Debug(char * text);
+ static void Fatal(char * text);
+};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Histogram.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,44 @@
+#include "Histogram.h"
+#include <math.h>
+
+inline double sqr(double x) {return(x*x);}
+
+void Histogram::calc_mean()
+{
+ int total_weight = 0;
+ int i;
+ for(i = 0; i < rows; i++)
+ total_weight = total_weight + weights[i];
+ mean = (double) total_weight / (double) rows;
+}
+
+void Histogram::calc_variance()
+{
+ double sum = 0;
+ for(int i = 0; i < rows; i++)
+ sum += sqr(weights[i] - mean);
+ variance = (double) sum / (double) rows;
+}
+
+void Histogram::calc_std_dev()
+{
+ std_dev = sqrt(variance);
+}
+
+void Histogram::calcStatistics()
+{
+ calc_mean();
+ calc_variance();
+ calc_std_dev();
+}
+
+Histogram::Histogram(int numrows)
+{
+ rows = numrows;
+ weights = new int[numrows];
+}
+
+Histogram::~Histogram()
+{
+ delete[] weights;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Histogram.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,28 @@
+#ifndef __HISTOGRAM__H_
+#define __HISTOGRAM__H_
+class Histogram
+{
+ double mean;
+ double variance;
+ double std_dev;
+
+ int rows;
+
+ void calc_mean();
+ void calc_variance();
+ void calc_std_dev();
+
+public:
+ int *weights;
+
+ Histogram(int numrows);
+ ~Histogram();
+
+ void calcStatistics();
+
+ float get_std_dev() { return std_dev;};
+ float get_variance() { return variance;};
+ float get_mean() { return mean;};
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile.in Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,374 @@
+# Makefile.in generated automatically by automake 1.4-p6 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = .
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+CXX = @CXX@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+VERSION = @VERSION@
+
+bin_PROGRAMS = prova
+prova_LDADD = -L/usr/lib/ -lnetpbm -lm
+prova_SOURCES = Text.cc Histogram.cc Bitmap.cc Page.cc prova.cc Errors.cc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_CLEAN_FILES =
+PROGRAMS = $(bin_PROGRAMS)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir)
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+prova_OBJECTS = Text.o Histogram.o Bitmap.o Page.o prova.o Errors.o
+prova_DEPENDENCIES =
+prova_LDFLAGS =
+CXXFLAGS = @CXXFLAGS@
+CXXCOMPILE = $(CXX) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@
+DIST_COMMON = README AUTHORS COPYING ChangeLog INSTALL Makefile.am \
+Makefile.in NEWS aclocal.m4 configure configure.in install-sh missing \
+mkinstalldirs
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+DEP_FILES = .deps/Bitmap.P .deps/Errors.P .deps/Histogram.P \
+.deps/Page.P .deps/Text.P .deps/prova.P
+SOURCES = $(prova_SOURCES)
+OBJECTS = $(prova_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .c .cc .o .s
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(BUILT_SOURCES)
+ cd $(top_builddir) \
+ && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(ACLOCAL_M4): configure.in
+ cd $(srcdir) && $(ACLOCAL)
+
+config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ $(SHELL) ./config.status --recheck
+$(srcdir)/configure: $(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+ cd $(srcdir) && $(AUTOCONF)
+
+mostlyclean-binPROGRAMS:
+
+clean-binPROGRAMS:
+ -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+distclean-binPROGRAMS:
+
+maintainer-clean-binPROGRAMS:
+
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ if test -f $$p; then \
+ echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \
+ $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+ else :; fi; \
+ done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ list='$(bin_PROGRAMS)'; for p in $$list; do \
+ rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+ done
+
+.s.o:
+ $(COMPILE) -c $<
+
+.S.o:
+ $(COMPILE) -c $<
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+prova: $(prova_OBJECTS) $(prova_DEPENDENCIES)
+ @rm -f prova
+ $(CXXLINK) $(prova_LDFLAGS) $(prova_OBJECTS) $(prova_LDADD) $(LIBS)
+.cc.o:
+ $(CXXCOMPILE) -c $<
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP)
+ list='$(SOURCES) $(HEADERS)'; \
+ unique=`for i in $$list; do echo $$i; done | \
+ awk ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ here=`pwd` && cd $(srcdir) \
+ && mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS)'; \
+ unique=`for i in $$list; do echo $$i; done | \
+ awk ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+ || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+
+# This target untars the dist file and tries a VPATH configuration. Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+ -rm -rf $(distdir)
+ GZIP=$(GZIP_ENV) $(TAR) zxf $(distdir).tar.gz
+ mkdir $(distdir)/=build
+ mkdir $(distdir)/=inst
+ dc_install_base=`cd $(distdir)/=inst && pwd`; \
+ cd $(distdir)/=build \
+ && ../configure --srcdir=.. --prefix=$$dc_install_base \
+ && $(MAKE) $(AM_MAKEFLAGS) \
+ && $(MAKE) $(AM_MAKEFLAGS) dvi \
+ && $(MAKE) $(AM_MAKEFLAGS) check \
+ && $(MAKE) $(AM_MAKEFLAGS) install \
+ && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+ && $(MAKE) $(AM_MAKEFLAGS) dist
+ -rm -rf $(distdir)
+ @banner="$(distdir).tar.gz is ready for distribution"; \
+ dashes=`echo "$$banner" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ echo "$$dashes"
+dist: distdir
+ -chmod -R a+r $(distdir)
+ GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir)
+ -rm -rf $(distdir)
+dist-all: distdir
+ -chmod -R a+r $(distdir)
+ GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir)
+ -rm -rf $(distdir)
+distdir: $(DISTFILES)
+ -rm -rf $(distdir)
+ mkdir $(distdir)
+ -chmod 777 $(distdir)
+ here=`cd $(top_builddir) && pwd`; \
+ top_distdir=`cd $(distdir) && pwd`; \
+ distdir=`cd $(distdir) && pwd`; \
+ cd $(top_srcdir) \
+ && $(AUTOMAKE) --include-deps --build-dir=$$here --srcdir-name=$(top_srcdir) --output-dir=$$top_distdir --gnu Makefile
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pr $$d/$$file $(distdir)/$$file; \
+ else \
+ test -f $(distdir)/$$file \
+ || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+
+DEPS_MAGIC := $(shell mkdir .deps > /dev/null 2>&1 || :)
+
+-include $(DEP_FILES)
+
+mostlyclean-depend:
+
+clean-depend:
+
+distclean-depend:
+ -rm -rf .deps
+
+maintainer-clean-depend:
+
+%.o: %.c
+ @echo '$(COMPILE) -c $<'; \
+ $(COMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
+ @-cp .deps/$(*F).pp .deps/$(*F).P; \
+ tr ' ' '\012' < .deps/$(*F).pp \
+ | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \
+ >> .deps/$(*F).P; \
+ rm .deps/$(*F).pp
+
+%.lo: %.c
+ @echo '$(LTCOMPILE) -c $<'; \
+ $(LTCOMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
+ @-sed -e 's/^\([^:]*\)\.o[ ]*:/\1.lo \1.o :/' \
+ < .deps/$(*F).pp > .deps/$(*F).P; \
+ tr ' ' '\012' < .deps/$(*F).pp \
+ | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \
+ >> .deps/$(*F).P; \
+ rm -f .deps/$(*F).pp
+
+%.o: %.cc
+ @echo '$(CXXCOMPILE) -c $<'; \
+ $(CXXCOMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
+ @-cp .deps/$(*F).pp .deps/$(*F).P; \
+ tr ' ' '\012' < .deps/$(*F).pp \
+ | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \
+ >> .deps/$(*F).P; \
+ rm .deps/$(*F).pp
+
+%.lo: %.cc
+ @echo '$(LTCXXCOMPILE) -c $<'; \
+ $(LTCXXCOMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
+ @-sed -e 's/^\([^:]*\)\.o[ ]*:/\1.lo \1.o :/' \
+ < .deps/$(*F).pp > .deps/$(*F).P; \
+ tr ' ' '\012' < .deps/$(*F).pp \
+ | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \
+ >> .deps/$(*F).P; \
+ rm -f .deps/$(*F).pp
+info-am:
+info: info-am
+dvi-am:
+dvi: dvi-am
+check-am: all-am
+check: check-am
+installcheck-am:
+installcheck: installcheck-am
+install-exec-am: install-binPROGRAMS
+install-exec: install-exec-am
+
+install-data-am:
+install-data: install-data-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-am
+uninstall-am: uninstall-binPROGRAMS
+uninstall: uninstall-am
+all-am: Makefile $(PROGRAMS)
+all-redirect: all-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am: mostlyclean-binPROGRAMS mostlyclean-compile \
+ mostlyclean-tags mostlyclean-depend mostlyclean-generic
+
+mostlyclean: mostlyclean-am
+
+clean-am: clean-binPROGRAMS clean-compile clean-tags clean-depend \
+ clean-generic mostlyclean-am
+
+clean: clean-am
+
+distclean-am: distclean-binPROGRAMS distclean-compile distclean-tags \
+ distclean-depend distclean-generic clean-am
+
+distclean: distclean-am
+ -rm -f config.status
+
+maintainer-clean-am: maintainer-clean-binPROGRAMS \
+ maintainer-clean-compile maintainer-clean-tags \
+ maintainer-clean-depend maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-am
+ -rm -f config.status
+
+.PHONY: mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \
+maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile tags mostlyclean-tags distclean-tags \
+clean-tags maintainer-clean-tags distdir mostlyclean-depend \
+distclean-depend clean-depend maintainer-clean-depend info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all installdirs \
+mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-generic clean mostlyclean distclean maintainer-clean
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Page.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,334 @@
+#include "Page.h" // Inclou pam.h
+#include "Bitmap.h"
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "Errors.h"
+
+Page::Page(const char *filename)
+{
+ map = NULL;
+ hist = NULL;
+ text = NULL;
+ readMap(filename);
+}
+
+Page::~Page()
+{
+ delete map;
+}
+
+void Page::readMap(const char *filename)
+{
+ FILE *fp;
+ int i,j;
+ char *image; // FIXME: Good type
+ tuple *tuplerow;
+
+ if (!strcmp(filename, "-"))
+ fp = stdin;
+ else
+ // FIXME: exception
+
+ if(!(fp = fopen(filename,"r")))
+ Errors::Fatal("Cannot open filename.");
+
+ // Si existia un mapa, el borrem.
+ if(map != NULL)
+ delete map;
+
+ pnm_readpaminit(fp, &pamImage, sizeof(pamImage));
+ // FIXME: Read ok
+
+ width = pamImage.width;
+ height = pamImage.height;
+
+ map = new Bitmap(width,height);
+ tuplerow = pnm_allocpamrow(&pamImage);
+
+
+ for (i=0; i<height; i++)
+ {
+ pnm_readpamrow(&pamImage, tuplerow);
+ for (j=0; j<width; j++)
+ {
+ // We only read the first sample (changing 0 and 1)
+ map->pixels[i][j] = tuplerow[j][0]?0:1;
+ }
+ }
+ pnm_freepamrow(tuplerow);
+
+ fclose(fp);
+}
+
+void Page::writeMap(const char *filename) const
+{
+ pam outpam;
+ FILE *fp;
+ tuple *tuplerow;
+ int i,j;
+
+ outpam = pamImage;
+ outpam.width = width;
+ outpam.height = height;
+
+ fprintf(stderr,"Writting %s: w:%i,h:%i\n", filename, width, height);
+
+ if (!strcmp(filename, "-"))
+ fp = stdout;
+ else
+ // FIXME: exception
+
+ if(!(fp = fopen(filename,"w")))
+ Errors::Fatal("Cannot open filename.");
+
+ outpam.file = fp;
+
+ pnm_writepaminit(&outpam);
+
+ tuplerow = pnm_allocpamrow(&outpam);
+
+ for (i=0; i<height; i++)
+ {
+ for (j=0; j<width; j++)
+ {
+ // We only write the first sample (changing 0 and 1)
+ tuplerow[j][0] = map->pixels[i][j]?0:1;
+ }
+ pnm_writepamrow(&outpam, tuplerow);
+ }
+ pnm_freepamrow(tuplerow);
+
+ fclose(fp);
+}
+
+/*
+static void Page::Initialize()
+{
+ pnm_init(NULL,NULL);
+}
+*/
+
+float Page::ratioBlackWhite() const
+{
+ int i,j;
+ float white=0, black=0;
+
+ for(i=0; i<height; i++)
+ for (j=0; j<width; j++)
+ if (map->pixels[i][j] == 0)
+ white++;
+ else
+ black++;
+ return black/white;
+}
+
+void Page::calcHistogram()
+{
+ if (hist!=NULL)
+ delete hist;
+
+ map->calcHistogram(hist);
+}
+
+void Page::rotateMap(float angle)
+{
+ Bitmap *newmap;
+ rotateMap(angle, newmap);
+
+ // Interchange maps: New -> Page's
+ delete map;
+ map = newmap;
+
+ height = map->get_height();
+ width = map->get_width();
+}
+
+void Page::rotateMap(float angle, Bitmap* &mapNew)
+{
+ // Code taken from OCRchie
+ int nx,ny,newheight,newwidth,oldheight,oldwidth,i,j,halfnewheight,halfnewwidth;
+ int halfoldheight,halfoldwidth;
+ double radians;
+ double cosval,sinval;
+
+ radians = -(angle) / ((180 / 3.142));
+ cosval = cos(radians);
+ sinval = sin(radians);
+
+ oldheight = height;
+ oldwidth = width;
+
+ newwidth = (int)abs((int)(oldwidth*cosval)) + (int)abs((int)(oldheight*sinval));
+ newheight = (int)abs((int)(-oldwidth*sinval)) + (int)abs((int)(oldheight*cosval));
+
+ halfnewheight = newheight / 2;
+ halfnewwidth = newwidth / 2;
+ halfoldwidth = oldwidth /2;
+ halfoldheight = oldheight /2 ;
+
+ int num_chars = (newwidth / 8) + 1;
+
+ mapNew = new Bitmap(newwidth,newheight);
+ mapNew->setToZero();
+
+ for(i=0;i < newheight;i++)
+ {
+ for(j=0;j < newwidth;j++)
+ {
+ nx =(int)( (j - halfnewwidth)*cosval + (i-halfnewheight)*sinval);
+ ny =(int)( -((j - halfnewwidth)*sinval) + (i - halfnewheight)*cosval);
+ nx = nx + halfoldwidth;
+ ny = ny + halfoldheight;
+ if ((nx < oldwidth) && (ny < oldheight) && (nx > 0) && (ny > 0))
+ {
+ mapNew->pixels[i][j] = map->pixels[ny][nx];
+ }
+ else
+ {
+ mapNew->pixels[i][j] = 0;
+ }
+ }
+ }
+
+
+}
+
+void Page::tryAngles(float min, float max, float step)
+{
+ float i;
+ float std_dev;
+
+
+ for(i=min; i<=max; i+=step)
+ {
+ std_dev = std_dev_lines_angle(i);
+ fprintf(stderr, "Angle: %f, Dev: %f\n", i,
+ std_dev);
+ }
+}
+
+float Page::std_dev_lines_angle(float angle)
+{
+ float retval;
+
+ Bitmap *tmpmap;
+ Histogram *tmphist;
+
+
+ if (angle == 0)
+ {
+ map->calcHistogram(tmphist);
+ }
+ else
+ {
+ rotateMap(angle,tmpmap);
+ tmpmap->calcHistogram(tmphist);
+ delete tmpmap;
+ }
+
+ retval = tmphist->get_std_dev();
+
+ delete tmphist;
+
+ return retval;
+}
+
+float Page::getSkew(int depth, float margin)
+{
+ float x1,x2,x3;
+ float y1,y2,y3;
+ float newx, newy;
+ int i;
+
+ x1 = -margin;
+ x2 = 0;
+ x3 = margin;
+
+ /* Bret's Method */
+ /*
+ for (i=0; i<depth; i++)
+ {
+ y1 = std_dev_lines_angle(x1);
+ y2 = std_dev_lines_angle(x2);
+ y3 = std_dev_lines_angle(x3);
+ fprintf(stderr, "New stddevs: %f, %f, %f\n", y1, y2, y3);
+
+ newx = x2 - 0.5 * ((x2-x1)*(x2-x1) * (y2-y3) -
+ (x2-x3)*(x2-x3) * (y2-y1)) /
+ ((x2-x1) * (y2-y3) - (x2-x3) * (y2-y1));
+
+ // Comprovacions
+
+ // Resultat
+ if (newx > x1 && newx < x2)
+ {
+ x3 = x2;
+ x2 = newx;
+ }
+ else if (newx > x2 && newx < x3)
+ {
+ x1 = x2;
+ x2 = newx;
+ }
+ else
+ Errors::Fatal("No convergence!");
+ fprintf(stderr, "Nous punts: %f, %f, %f\n", x1,x2,x3);
+ }
+ */
+
+ /* Golden Slice */
+ y1 = std_dev_lines_angle(x1);
+ y2 = std_dev_lines_angle(x2);
+ y3 = std_dev_lines_angle(x3);
+
+ for (i=0; i<depth; i++)
+ {
+
+ // Flanc esquerra
+ newx = (x2+x1)/2;
+ newy = std_dev_lines_angle(newx);
+
+ // Resultat
+ if (newy < y2 )
+ {
+ x1 = newx;
+ y1 = newy;
+ }
+ else // newy < y2
+ {
+ x3 = x2;
+ y3 = y2;
+ x2 = newx;
+ y2 = newy;
+ }
+
+ // Flanc dret
+ newx = (x3+x2)/2;
+ newy = std_dev_lines_angle(newx);
+
+ // Resultat
+ if (newy < y2 )
+ {
+ x3 = newx;
+ y3 = newy;
+ }
+ else // newy < y2
+ {
+ x1 = x2;
+ y1 = y2;
+ x2 = newx;
+ y2 = newy;
+ }
+ fprintf(stderr, "Nous punts: %f, %f, %f\n", x1,x2,x3);
+ }
+ return x2;
+}
+
+void Page::getText()
+{
+ if (text == NULL)
+ text = new Text();
+
+ text->getLines(map);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Page.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,57 @@
+#ifndef __PAGE_H__
+#define __PAGE_H__
+extern "C" {
+#include <pam.h>
+}
+#include "Bitmap.h"
+#include "Histogram.h"
+#include "Text.h"
+
+typedef char *tRow;
+
+class Page
+{
+public:
+ Page(const char *filename);
+ ~Page();
+
+ int imageWidth() const { return width; };
+ int imageHeight() const { return height;};
+
+ //writeMap(char *filename) const;
+
+ /*
+ Angle skewAngle();
+ */
+ void rotateMap(float angle);
+ void rotateMap(float angle, Bitmap* &mapNew);
+
+ void calcHistogram();
+
+ /* Funció una mica inútil */
+ void tryAngles(float min, float max, float step);
+ float getSkew(int depth = 5, float margin = 3);
+
+ float ratioBlackWhite() const;
+
+ // OCR
+ void getText();
+
+ //static void Initialize();
+
+ void writeMap(const char *filename) const;
+ void readMap(const char *filename);
+
+private:
+ float std_dev_lines_angle(float angle);
+
+ int width;
+ int height;
+
+ pam pamImage;
+ Bitmap *map;
+ Text *text;
+
+ Histogram *hist;
+};
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,13 @@
+PÃ gina del projecte:
+http://http.cs.berkeley.edu/~fateman/kathey/ocrchie.html
+
+Fitxers a baixar:
+
+Advanced Orchie:
+http://http.cs.berkeley.edu/~fateman/kathey/OCRchie.new.tar.gz
+
+Older stable orchie:
+http://http.cs.berkeley.edu/~fateman/kathey/ancientsimpleOCRchie.tar.gz
+
+Newly updated version of simpler orchie:
+http://http.cs.berkeley.edu/~fateman/kathey/simpleocr.tar.gz
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Rectangle.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,31 @@
+#ifndef __RECTANGLE_H__
+#define __RECTANGLE_H__
+
+class Rectangle
+{
+ int ulx, uly, drx, dry;
+
+public:
+ int get_ulx() const { return ulx; };
+ int get_uly() const { return uly; };
+ int get_drx() const { return drx; };
+ int get_dry() const { return dry; };
+
+ void set_ulx(int num) {ulx = num;};
+ void set_uly(int num) {uly = num;};
+ void set_drx(int num) {drx = num;};
+ void set_dry(int num) {dry = num;};
+
+ Rectangle(int _ulx, int _uly, int _drx, int _dry)
+ { ulx = _ulx; uly = _uly; drx = _drx; dry = _dry;};
+
+ Rectangle(const Rectangle &a)
+ {
+ ulx = a.get_ulx();
+ uly = a.get_uly();
+ drx = a.get_drx();
+ dry = a.get_dry();
+ };
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Text.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,54 @@
+#include <Text.h>
+
+void Text::getLines(Bitmap *map)
+{
+ int map_height = map->get_height();
+ int linenum=0;
+ int last_linenum=0;
+ int lineSize;
+
+ // FIXME: Comprovacions
+
+ int line_top;
+ int line_bottom;
+ int lastline_top;
+ int lastline_bottom;
+
+ Lines.clear();
+
+ for (int i = 0; i < map_height; i++)
+ {
+ // Fins a trobar començament de lÃnia
+ while (i< map_height &&
+ map->points_per_row(i) < NoiseTolerance)
+ i++;
+
+ line_top = i++;
+ // Fins a trobar final de lÃnia
+ while (i< map_height &&
+ map->points_per_row(i) > NoiseTolerance)
+ i++;
+
+ // If this line is less than MinVertSeparation away
+ // from the last line. Join the two together.
+ if (linenum > 0)
+ {
+ if (line_top - lastline_bottom < MinVertSeparation)
+ lastline_bottom = i;
+ }
+
+ lineSize = i - line_top + 1;
+ if (lineSize >= MinLineSize)
+ {
+ line_bottom = i;
+ Lines.push_back(Rectangle(0,line_top,
+ map->get_height(), line_bottom));
+
+ fprintf(stderr, "Line from %i to %i : %i.\n", line_top,
+ line_bottom, line_bottom - line_top + 1);
+
+ lastline_top = line_top;
+ lastline_bottom = line_bottom;
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Text.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,18 @@
+#include <vector>
+#include "Rectangle.h"
+#include "Bitmap.h"
+
+using namespace std;
+
+class Text
+{
+ vector<Rectangle> Lines;
+
+public:
+ void getLines(Bitmap *map);
+
+ // Constants
+ static const int MinVertSeparation=3;
+ static const int MinLineSize=5;
+ static const int NoiseTolerance=6;
+};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configure.in Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,19 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT(prova.cc)
+
+AM_INIT_AUTOMAKE(prova,0.1)
+
+dnl Checks for programs.
+AC_PROG_CXX
+AC_PROG_INSTALL
+
+dnl Checks for libraries.
+
+dnl Checks for header files.
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+
+dnl Checks for library functions.
+
+AC_OUTPUT(Makefile)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/prova.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,26 @@
+#include "Page.h"
+#include <stdio.h>
+
+int main(int argn, char **argv)
+{
+ Page *Imatge;
+
+ pnm_init(&argn, argv);
+
+ if (argn >= 2)
+ Imatge = new Page(argv[1]);
+ else
+ Imatge = new Page("rotated.pbm");
+
+ //printf("B/W: %f\n", Imatge->ratioBlackWhite());
+ //Imatge->rotateMap(90);
+ //Imatge->writeMap("rotated.pbm");
+ //Imatge->tryAngles(-10,10,1);
+
+ /* Arreglem angle */
+ Imatge->rotateMap(Imatge->getSkew(10));
+ if (argn >= 3)
+ Imatge->writeMap(argv[2]);
+
+ //Imatge->getText();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/4 Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,679 @@
+
+654 164 217 211 236 212 217 164 236
+
+THE 659 211 727 233 728 211 659 233
+AMERICAN 738 211 928 233 929 211 738 233
+ECONOMIC 938 211 1131 233 1132 211 938 233
+REVIEW 1142 212 1286 235 1287 212 1142 235
+
+SEPTEMBER 1557 217 1766 239 1768 217 1557 239
+1980 1781 220 1846 239 1847 220 1781 239
+
+dispersion 161 307 340 344 341 307 160 343
+and 356 307 418 336 419 307 356 336
+models 434 307 557 336 558 307 434 336
+of 574 307 608 336 609 307 574 336
+temporal 624 308 780 346 781 308 623 344
+price 796 308 882 346 883 308 794 344
+dis 898 308 937 337 938 308 898 337
+- 938 317 959 337 960 317 938 337
+
+persion. 161 355 296 393 297 355 160 392
+Most 315 355 403 385 405 355 315 385
+models 420 355 543 385 544 355 420 385
+of 559 355 591 385 592 355 559 385
+spatial 608 356 721 394 722 356 607 393
+price 739 356 824 394 825 356 738 393
+disper 840 356 937 394 938 356 839 393
+- 938 366 959 386 960 366 938 386
+
+sion, 161 400 240 429 242 400 161 429
+such 259 400 336 429 337 400 259 429
+as 355 409 388 429 389 409 355 429
+the 407 400 457 429 458 400 407 429
+Salop-Stiglitz 475 400 711 438 712 400 474 437
+model 728 401 837 431 838 401 728 431
+or 855 411 889 431 890 411 855 431
+the 907 401 959 431 960 401 907 431
+
+Wilde-Schwartz 160 445 439 474 440 445 160 474
+model, 458 445 576 474 577 445 458 474
+have 597 446 675 475 676 446 597 475
+equilibria 695 446 864 484 865 446 694 483
+with 883 446 957 475 959 446 883 475
+
+specific 160 491 290 529 291 491 159 527
+prices 314 491 414 529 415 491 312 527
+being 439 491 535 529 536 491 438 527
+charged 559 491 698 529 699 491 558 527
+with 724 492 797 522 798 492 724 522
+positive 823 492 957 530 959 492 822 529
+
+probability 159 537 354 575 355 537 158 574
+mass. 367 546 459 566 460 546 367 566
+The 473 537 540 566 542 537 473 566
+a 555 546 555 566 555 546 555 566
+? 555 546 575 566 576 546 555 566
+bove 576 537 657 566 659 537 576 566
+argument 672 538 840 576 842 538 670 575
+shows 855 538 957 568 959 538 855 568
+
+that 160 583 227 612 229 583 160 612
+such 253 583 330 612 331 583 253 612
+strategies 357 583 517 621 518 583 356 621
+cannot 544 583 664 612 666 583 544 612
+be 690 584 729 614 731 584 690 614
+profit-max 757 584 935 622 936 584 755 622
+- 936 594 956 614 957 594 936 614
+
+imizing 160 630 289 668 290 630 159 668
+Nash 311 630 401 660 402 630 311 660
+behavior 426 630 581 660 582 630 426 660
+in 604 631 635 661 636 631 604 661
+a 660 641 677 661 679 641 660 661
+temporal 705 631 862 669 863 631 703 669
+ran 888 641 936 661 937 641 888 661
+- 937 641 957 661 959 641 937 661
+
+domizing 160 677 322 715 323 677 159 715
+model. 340 677 454 707 455 677 340 707
+
+Since 203 724 294 753 295 724 203 753
+there 322 724 407 753 408 724 322 753
+are 434 733 485 753 486 733 434 753
+no 513 733 555 753 556 733 513 753
+point 582 724 673 761 674 724 581 761
+masses 701 734 819 754 820 734 701 754
+in 849 725 878 754 879 725 849 754
+the 907 725 957 754 959 725 907 754
+
+equilibrium 160 768 363 806 364 768 159 806
+density, 380 768 512 806 513 768 379 806
+the 531 768 582 798 583 768 531 798
+cumulative 600 770 792 799 793 770 600 799
+distribu 810 770 936 799 937 770 810 799
+- 937 779 957 799 959 779 937 799
+
+tion 161 814 226 844 227 814 161 844
+function 248 814 394 844 395 814 248 844
+will 415 814 475 844 477 814 415 844
+be 497 814 535 844 536 814 497 844
+a 557 824 572 844 574 824 557 844
+continuous 594 814 788 844 790 814 594 844
+function 811 816 957 845 959 816 811 845
+
+on 159 870 201 890 203 870 159 890
+(p 222 871 248 898 249 871 220 898
+* 249 870 269 890 270 870 249 890
+, 270 871 292 890 294 871 270 890
+r). 308 871 348 890 349 871 308 890
+Let 370 861 425 890 426 861 370 890
+F(p) 445 863 530 898 531 863 444 898
+be 551 861 589 890 590 861 551 890
+the 610 862 660 891 661 862 610 891
+cumulative 681 862 876 891 877 862 681 891
+dis 895 862 935 891 936 862 895 891
+- 936 871 956 891 957 871 936 891
+
+tribution 160 905 314 935 315 905 160 935
+function 348 905 493 935 494 905 348 935
+for 526 905 576 935 577 905 526 935
+f(p); 602 909 695 944 696 909 601 944
+thus 735 907 805 936 806 907 735 936
+f(p)= 833 907 950 944 951 907 832 944
+
+F‚( 160 951 210 981 211 951 160 981
+p) 218 962 257 989 258 962 217 989
+almost 275 951 390 981 392 951 275 981
+everywhere. 408 951 615 989 616 951 407 989
+
+We 201 998 257 1027 258 998 201 1027
+can 275 1007 334 1027 335 1007 275 1027
+now 351 1007 422 1027 424 1007 351 1027
+construct 440 998 602 1027 603 998 440 1027
+the 620 998 672 1027 673 998 620 1027
+expected 689 999 843 1037 844 999 688 1037
+profit 861 999 956 1037 957 999 859 1037
+
+function 160 1042 305 1072 307 1042 160 1072
+for 328 1042 376 1072 377 1042 328 1072
+a 396 1052 413 1072 414 1052 396 1072
+representative 434 1042 680 1080 681 1042 433 1080
+store. 701 1044 793 1073 794 1044 701 1073
+When 816 1044 918 1073 920 1044 816 1073
+a 940 1053 956 1073 957 1053 940 1073
+
+store 161 1088 244 1118 245 1088 161 1118
+charges 262 1088 392 1126 393 1088 261 1126
+price 409 1088 493 1126 494 1088 408 1126
+p, 504 1099 538 1126 539 1099 503 1126
+exactly 557 1088 681 1126 682 1088 556 1126
+two 700 1090 761 1119 762 1090 700 1119
+events 779 1090 889 1119 890 1090 779 1119
+are 907 1099 959 1119 960 1099 907 1119
+
+relevant. 160 1135 309 1164 310 1135 160 1164
+It 338 1135 364 1164 366 1135 338 1164
+may 389 1144 461 1172 462 1144 388 1172
+be 488 1135 526 1164 527 1135 488 1164
+that 555 1135 621 1164 622 1135 555 1164
+p 641 1146 661 1174 662 1146 640 1174
+is 688 1136 713 1165 714 1136 688 1165
+the 741 1136 792 1165 793 1136 741 1165
+smallest 819 1136 959 1165 960 1136 819 1165
+
+price 160 1181 245 1218 246 1181 159 1218
+being 261 1181 356 1218 357 1181 259 1218
+charged, 370 1181 517 1218 518 1181 369 1218
+in 535 1181 565 1210 566 1181 535 1210
+which 581 1181 683 1210 685 1181 581 1210
+case, 701 1191 781 1211 783 1191 701 1211
+the 799 1182 851 1211 852 1182 799 1211
+given 866 1182 959 1220 960 1182 865 1220
+
+store 161 1225 244 1255 245 1225 161 1255
+gets 274 1225 338 1263 340 1225 272 1263
+all 369 1225 407 1255 408 1225 369 1255
+of 438 1225 470 1255 471 1225 438 1255
+the 500 1225 551 1255 552 1225 500 1255
+informed 582 1225 741 1255 742 1225 582 1255
+customers. 772 1227 957 1256 959 1227 772 1256
+
+This 160 1273 236 1302 237 1273 160 1302
+event 266 1273 357 1302 359 1273 266 1302
+happens 387 1273 531 1311 532 1273 386 1311
+only 561 1273 635 1311 636 1273 559 1311
+if 666 1274 688 1303 689 1274 666 1303
+all 718 1274 757 1303 758 1274 718 1303
+the 787 1274 839 1303 840 1274 787 1303
+other 870 1274 960 1303 961 1274 870 1303
+
+stores 160 1316 258 1346 259 1316 160 1346
+charge 278 1316 393 1354 394 1316 277 1354
+prices 412 1316 511 1354 512 1316 411 1354
+higher 531 1316 641 1354 642 1316 530 1354
+than 660 1318 738 1347 739 1318 660 1347
+p, 752 1328 784 1355 785 1328 751 1355
+an 805 1327 845 1347 846 1327 805 1347
+event 865 1318 957 1347 959 1318 865 1347
+
+which 160 1362 263 1392 264 1362 160 1392
+has 281 1362 337 1392 338 1362 281 1392
+probability 356 1362 550 1400 551 1362 355 1400
+(1 568 1372 595 1392 596 1372 568 1392
+…F(p)) 610 1366 735 1401 737 1366 609 1401
+? 737 1374 755 1393 757 1374 737 1393
+? 757 1374 775 1393 777 1374 757 1393
+‚. 777 1374 816 1393 817 1374 777 1393
+On 837 1364 888 1393 889 1364 837 1393
+the 907 1364 957 1393 959 1364 907 1393
+
+other 160 1410 250 1439 251 1410 160 1439
+hand, 268 1410 364 1439 366 1410 268 1439
+there 386 1410 470 1439 471 1410 386 1439
+may 488 1419 562 1448 563 1419 487 1448
+be 579 1410 620 1439 621 1410 579 1439
+some 638 1420 726 1440 727 1420 638 1440
+store 746 1413 830 1440 831 1413 746 1440
+with 848 1411 921 1440 922 1411 848 1440
+a 940 1420 959 1440 960 1420 940 1440
+
+lower 161 1453 256 1483 257 1453 161 1483
+price, 274 1453 367 1491 368 1453 272 1491
+in 386 1453 416 1483 418 1453 386 1483
+which 434 1453 537 1483 538 1453 434 1483
+case 556 1463 628 1483 629 1463 556 1483
+the 647 1455 698 1484 699 1455 647 1484
+store 716 1455 798 1484 799 1455 716 1484
+in 818 1455 850 1484 851 1455 818 1484
+ques 868 1464 936 1492 937 1464 866 1492
+- 937 1464 957 1484 959 1464 937 1484
+
+tion 162 1500 229 1529 230 1500 162 1529
+only 252 1500 328 1537 329 1500 251 1537
+gets 351 1500 416 1537 418 1500 350 1537
+its 440 1500 478 1529 479 1500 440 1529
+share 503 1500 592 1529 594 1500 503 1529
+of 617 1501 650 1530 651 1501 617 1530
+the 675 1501 727 1530 728 1501 675 1530
+uninformed 753 1501 959 1530 960 1501 753 1530
+
+customers. 161 1546 347 1575 348 1546 161 1575
+This 370 1546 444 1575 445 1546 370 1575
+event 466 1546 558 1575 559 1546 466 1575
+happens 579 1546 725 1583 726 1546 578 1583
+with 748 1547 822 1576 823 1547 748 1576
+proba 844 1547 937 1585 938 1547 843 1585
+- 938 1556 959 1576 960 1556 938 1576
+
+bility 162 1593 251 1631 252 1593 161 1631
+1 285 1602 294 1622 295 1602 285 1622
+… 308 1602 331 1622 333 1602 308 1622
+(1 342 1602 369 1622 370 1602 342 1622
+… 383 1602 407 1622 408 1602 383 1622
+F( 418 1600 455 1622 457 1600 418 1622
+p 465 1603 485 1631 486 1603 464 1631
+))fl 493 1600 535 1622 536 1600 493 1622
+- 544 1603 558 1622 559 1603 544 1622
+I 571 1600 576 1622 577 1600 571 1622
+(By 620 1594 677 1632 679 1594 618 1632
+Proposition 709 1594 911 1632 912 1594 708 1632
+3 942 1603 957 1624 959 1603 942 1624
+
+we 161 1646 206 1666 207 1646 161 1666
+can 229 1646 288 1666 289 1646 229 1666
+neglect 309 1637 431 1674 432 1637 308 1674
+the 453 1637 503 1666 504 1637 453 1666
+probability 525 1637 720 1674 721 1637 524 1674
+of 742 1638 774 1667 775 1638 742 1667
+any 796 1647 857 1675 858 1647 794 1675
+ties.) 879 1638 959 1667 960 1638 879 1667
+
+Hence 164 1683 272 1712 274 1683 164 1712
+the 297 1683 348 1712 349 1683 297 1712
+expected 372 1683 523 1720 524 1683 370 1720
+profit 546 1683 643 1720 644 1683 545 1720
+of 667 1684 700 1713 701 1684 667 1713
+a 724 1693 740 1713 741 1693 724 1713
+representa 764 1684 937 1722 938 1684 762 1722
+- 938 1693 959 1713 960 1693 938 1713
+
+tive 161 1730 222 1759 223 1730 161 1759
+store 240 1730 322 1759 323 1730 240 1759
+is 341 1730 364 1759 366 1730 341 1759
+
+f 178 1846 253 1868 255 1846 178 1868
+? 255 1855 269 1868 270 1855 255 1868
+( 270 1855 284 1868 285 1855 270 1868
+? 285 1855 300 1868 301 1855 285 1868
+s(p)(l 301 1846 387 1877 388 1846 300 1877
+? 438 1855 388 1867 388 1854 438 1868
+(p))fl 438 1846 574 1877 575 1846 437 1877
+? 575 1855 589 1868 590 1855 575 1868
+
+p 196 1886 210 1908 211 1886 194 1908
+
+1 424 1979 432 1999 433 1979 424 1999
+… 446 1979 468 1999 470 1979 446 1999
+(1 481 1979 510 1999 511 1979 481 1999
+…F(p)) 525 1970 661 2007 662 1970 524 2007
+? 662 1979 682 1999 683 1979 662 1999
+? 683 1979 703 1999 705 1979 683 1999
+1 705 1979 725 1999 726 1979 705 1999
+j 738 1979 746 2007 747 1979 737 2007
+JJ(o) 760 1970 863 1999 864 1970 760 1999
+dp 876 1971 911 2009 912 1971 875 2009
+
+where 162 2070 265 2100 266 2070 162 2100
+r 322 2079 433 2100 434 2079 322 2100
+? 434 2079 454 2100 455 2079 434 2100
+(p)=p(U+I)…c(U+i) 455 2070 803 2108 804 2070 454 2108
+
+? 316 2173 405 2192 406 2173 316 2192
+? 406 2173 425 2192 426 2173 406 2192
+(p)=pU 426 2164 544 2200 545 2164 425 2200
+? 545 2173 564 2192 565 2173 545 2192
+c(U) 565 2164 643 2192 644 2164 565 2192
+
+The 203 2261 269 2291 270 2261 203 2291
+maximization 289 2261 530 2291 531 2261 289 2291
+problem 550 2261 695 2299 696 2261 549 2299
+of 716 2262 751 2292 752 2262 716 2292
+the 772 2262 823 2292 824 2262 772 2292
+firm 844 2262 915 2292 916 2262 844 2292
+is 936 2262 960 2292 961 2262 936 2292
+
+to 162 2306 194 2336 196 2306 162 2336
+choose 212 2306 330 2336 331 2306 212 2336
+the 350 2306 400 2336 401 2306 350 2336
+density 418 2306 543 2344 544 2306 416 2344
+function 563 2306 708 2336 709 2306 563 2336
+f(p) 721 2307 803 2345 804 2307 720 2345
+so 823 2317 857 2337 858 2317 823 2337
+as 876 2317 908 2337 909 2317 876 2337
+to 927 2307 960 2337 961 2307 927 2337
+
+maximize 162 2351 328 2381 329 2351 162 2381
+expected 361 2351 512 2389 513 2351 360 2389
+profits 544 2351 657 2389 659 2351 543 2389
+subject 689 2352 812 2390 813 2352 688 2390
+to 845 2352 876 2382 877 2352 845 2382
+the 908 2352 960 2382 961 2352 908 2382
+
+constraints: 164 2398 367 2428 368 2398 164 2428
+
+f(p)>=0; 327 2506 493 2538 494 2506 325 2538
+frj(p)dp=l 545 2506 790 2538 791 2506 544 2538
+
+p 564 2554 578 2577 579 2554 563 2577
+
+expected 1047 312 1200 350 1201 312 1046 349
+profit; 1224 312 1332 350 1333 312 1223 349
+for 1361 312 1410 342 1411 312 1361 342
+if 1433 312 1457 342 1458 312 1433 342
+some 1481 322 1568 342 1569 322 1481 342
+price 1592 314 1679 351 1680 314 1590 350
+yields 1703 314 1803 351 1804 314 1701 350
+a 1828 323 1846 343 1846 323 1828 343
+
+greater 1047 357 1168 395 1169 357 1046 394
+profit 1183 357 1280 395 1281 357 1182 394
+than 1298 357 1372 387 1373 357 1298 387
+some 1388 367 1477 387 1478 367 1388 387
+other 1494 359 1582 388 1583 359 1494 388
+price 1599 359 1684 396 1685 359 1598 395
+it 1701 359 1722 388 1723 359 1701 388
+would 1738 359 1844 388 1846 359 1738 388
+
+pay 1046 413 1107 441 1109 413 1045 440
+to 1135 403 1165 433 1166 403 1135 433
+increase 1192 403 1333 433 1334 403 1192 433
+the 1360 403 1411 433 1412 403 1360 433
+frequency 1438 403 1614 441 1615 403 1437 440
+with 1640 405 1714 434 1716 405 1640 434
+which 1740 405 1844 434 1846 405 1740 434
+
+the 1047 450 1097 479 1098 450 1047 479
+more 1111 459 1197 479 1198 459 1111 479
+profitable 1211 450 1383 487 1384 450 1210 486
+price 1396 450 1481 487 1482 450 1394 486
+were 1494 460 1574 480 1575 460 1494 480
+charged. 1587 451 1737 488 1738 451 1586 487
+Since 1753 451 1843 480 1844 451 1753 480
+
+we 1045 504 1090 524 1091 504 1045 524
+require 1107 494 1228 532 1229 494 1106 531
+zero 1243 504 1314 524 1315 504 1243 524
+profits 1331 494 1444 532 1445 494 1329 531
+due 1461 494 1521 524 1522 494 1461 524
+to 1538 496 1569 525 1570 496 1538 525
+free 1586 496 1651 525 1652 496 1586 525
+entry, 1667 496 1765 533 1766 496 1666 532
+this 1783 496 1842 525 1843 496 1783 525
+
+common 1046 550 1198 570 1200 550 1046 570
+level 1218 540 1295 570 1296 540 1218 570
+of 1315 540 1350 570 1351 540 1315 570
+profit 1368 540 1466 578 1468 540 1367 577
+must 1487 542 1569 571 1570 542 1487 571
+be 1590 542 1629 571 1631 542 1590 571
+zero. 1650 551 1727 571 1729 551 1650 571
+6 1729 551 1749 571 1750 551 1729 571
+This 1768 542 1842 571 1843 542 1768 571
+
+argument 1046 587 1211 624 1212 587 1045 624
+yields 1227 587 1325 624 1326 587 1225 624
+
+PROPOSITION 1047 677 1333 707 1334 677 1047 707
+4: 1354 687 1384 707 1385 687 1354 707
+if 1413 685 1445 707 1446 685 1413 707
+f(p) 1450 680 1531 715 1533 680 1449 715
+>0, 1543 689 1607 708 1608 689 1543 708
+then 1627 681 1694 708 1696 681 1627 708
+
+r 1316 786 1322 806 1322 786 1316 806
+? 1322 786 1342 806 1344 786 1322 806
+(p)(l 1344 777 1445 814 1446 777 1342 814
+…F(p)) 1459 777 1603 814 1605 777 1458 814
+? 1605 786 1625 806 1626 786 1605 806
+? 1626 786 1646 806 1647 786 1626 806
+
+1(1 1334 891 1422 911 1423 891 1334 911
+F(p)) 1471 882 1574 920 1575 882 1470 920
+? 1575 891 1595 911 1596 891 1575 911
+? 1596 891 1616 911 1618 891 1596 911
+‚j 1618 891 1658 920 1659 891 1616 920
+=0 1673 891 1727 911 1729 891 1673 911
+
+(Of 1087 999 1143 1028 1144 999 1087 1028
+course, 1162 1008 1283 1028 1285 1008 1162 1028
+Proposition 1306 999 1508 1037 1509 999 1305 1037
+4 1527 1009 1546 1029 1547 1009 1527 1029
+also 1566 1000 1633 1029 1634 1000 1566 1029
+follows 1653 1000 1777 1029 1778 1000 1653 1029
+di 1797 1000 1821 1029 1822 1000 1797 1029
+- 1822 1009 1842 1029 1843 1009 1822 1029
+
+rectly 1045 1044 1140 1081 1142 1044 1044 1081
+from 1170 1044 1253 1073 1254 1044 1170 1073
+the 1282 1044 1332 1073 1333 1044 1282 1073
+application 1360 1044 1559 1081 1560 1044 1359 1081
+of 1587 1045 1620 1074 1621 1045 1587 1074
+the 1650 1045 1700 1074 1701 1045 1650 1074
+Kuhn 1731 1045 1821 1074 1822 1045 1731 1074
+- 1822 1054 1842 1074 1843 1054 1822 1074
+
+Tucker 1045 1090 1170 1119 1171 1090 1045 1119
+theorem 1192 1090 1333 1119 1334 1090 1192 1119
+to 1357 1090 1387 1119 1388 1090 1357 1119
+the 1411 1090 1462 1119 1463 1090 1411 1119
+specified 1485 1091 1639 1129 1640 1091 1484 1129
+maximiza 1661 1091 1822 1120 1823 1091 1661 1120
+- 1823 1100 1843 1120 1844 1100 1823 1120
+
+tion 1046 1137 1112 1166 1113 1137 1046 1166
+problem.) 1142 1137 1311 1175 1312 1137 1140 1175
+Rearranging 1341 1137 1561 1175 1562 1137 1340 1175
+this 1590 1138 1651 1168 1652 1138 1590 1168
+equation, 1680 1138 1843 1176 1844 1138 1679 1176
+
+we 1046 1191 1091 1211 1092 1191 1046 1211
+have 1126 1182 1205 1211 1207 1182 1126 1211
+a 1242 1191 1260 1211 1260 1191 1242 1211
+formula 1294 1182 1431 1211 1432 1182 1294 1211
+for 1468 1182 1516 1211 1517 1182 1468 1211
+the 1553 1183 1603 1212 1605 1183 1553 1212
+equilibrium 1640 1183 1843 1221 1844 1183 1639 1221
+
+cumulative 1046 1230 1238 1260 1240 1230 1046 1260
+distribution 1256 1230 1462 1260 1463 1230 1256 1260
+function: 1481 1230 1639 1260 1640 1230 1481 1260
+
+l 1158 1371 1295 1400 1296 1371 1158 1400
+? 1296 1380 1316 1400 1318 1380 1296 1400
+F(P)=( 1318 1371 1440 1400 1442 1371 1318 1400
+? 1442 1380 1462 1400 1463 1380 1442 1400
+($¼1s(p) 1463 1380 1626 1409 1627 1380 1462 1409
+)nlt 1644 1372 1724 1401 1725 1372 1644 1401
+
+Note 1046 1502 1131 1531 1132 1502 1046 1531
+that 1149 1502 1215 1531 1216 1502 1149 1531
+the 1233 1502 1281 1531 1282 1502 1233 1531
+denominator 1298 1502 1525 1531 1527 1502 1298 1531
+of 1541 1503 1574 1533 1575 1503 1541 1533
+this 1592 1503 1651 1533 1652 1503 1592 1533
+fraction 1667 1503 1803 1533 1804 1503 1667 1533
+is 1821 1503 1843 1533 1844 1503 1821 1533
+
+negative 1046 1547 1190 1585 1191 1547 1045 1585
+for 1209 1547 1259 1576 1260 1547 1209 1576
+any 1276 1556 1337 1585 1338 1556 1275 1585
+p 1348 1557 1371 1585 1372 1557 1347 1585
+between 1390 1547 1534 1576 1535 1547 1390 1576
+p 1546 1559 1564 1586 1566 1559 1544 1586
+* 1566 1557 1586 1577 1587 1557 1566 1577
+and 1608 1548 1671 1577 1672 1548 1608 1577
+r. 1688 1559 1712 1577 1713 1559 1688 1577
+Hence 1733 1548 1843 1577 1844 1548 1733 1577
+
+the 1047 1595 1097 1625 1098 1595 1047 1625
+numerator 1130 1595 1314 1625 1315 1595 1130 1625
+must 1346 1595 1430 1625 1431 1595 1346 1625
+be 1462 1595 1500 1625 1501 1595 1462 1625
+negative 1531 1596 1678 1634 1679 1596 1530 1634
+so 1711 1606 1745 1626 1746 1606 1711 1626
+that 1777 1596 1843 1626 1844 1596 1777 1626
+
+profits 1046 1639 1157 1677 1158 1639 1045 1677
+in 1179 1639 1209 1668 1210 1639 1179 1668
+the 1233 1639 1282 1668 1283 1639 1233 1668
+event 1303 1639 1397 1668 1398 1639 1303 1668
+of 1418 1639 1451 1668 1452 1639 1418 1668
+failure 1471 1639 1582 1668 1583 1639 1471 1668
+are 1605 1650 1657 1670 1658 1650 1605 1670
+definitely 1679 1640 1843 1678 1844 1640 1678 1678
+
+negative. 1046 1685 1200 1723 1201 1685 1045 1723
+The 1223 1685 1288 1714 1289 1685 1223 1714
+construction 1309 1685 1529 1714 1530 1685 1309 1714
+of 1550 1686 1585 1716 1586 1686 1550 1716
+(1 1606 1696 1633 1716 1634 1696 1606 1716
+…F(p)) 1648 1688 1779 1724 1781 1688 1647 1724
+? 1781 1697 1800 1716 1800 1697 1781 1716
+
+is 1045 1730 1067 1759 1068 1730 1045 1759
+illustrated 1091 1730 1267 1759 1268 1730 1091 1759
+in 1290 1730 1319 1759 1320 1730 1290 1759
+Figure 1345 1730 1457 1768 1458 1730 1344 1768
+1. 1487 1740 1508 1761 1509 1740 1487 1761
+At 1533 1731 1574 1761 1575 1731 1533 1761
+each 1598 1731 1677 1761 1678 1731 1598 1761
+p 1694 1742 1716 1769 1717 1742 1693 1769
+where 1739 1731 1842 1761 1843 1731 1739 1761
+
+f(p)>O 1038 1778 1184 1814 1185 1778 1037 1814
+we 1207 1785 1251 1805 1253 1785 1207 1805
+can 1275 1785 1332 1805 1333 1785 1275 1805
+construct 1357 1776 1521 1805 1522 1776 1357 1805
+rf(P) 1543 1779 1640 1807 1641 1779 1543 1807
+and 1664 1777 1727 1807 1729 1777 1664 1807
+? 1751 1787 1729 1805 1729 1785 1751 1807
+s(P) 1751 1777 1842 1807 1843 1777 1751 1807
+
+as 1045 1833 1078 1853 1079 1833 1045 1853
+illustrated 1107 1823 1280 1853 1281 1823 1107 1853
+and 1308 1823 1372 1853 1373 1823 1308 1853
+take 1401 1823 1472 1853 1474 1823 1401 1853
+the 1503 1824 1554 1854 1555 1824 1503 1854
+relevant 1582 1824 1724 1854 1725 1824 1582 1854
+ratio. 1753 1824 1841 1854 1842 1824 1753 1854
+
+Proposition 1046 1868 1247 1906 1248 1868 1045 1906
+4 1264 1877 1283 1898 1283 1877 1264 1898
+gives 1299 1868 1384 1906 1385 1868 1298 1906
+us 1401 1877 1437 1898 1438 1877 1401 1898
+an 1455 1877 1495 1898 1496 1877 1455 1898
+explicit 1512 1869 1640 1907 1641 1869 1511 1907
+expression 1658 1869 1842 1907 1843 1869 1657 1907
+
+for 1045 1914 1093 1944 1094 1914 1045 1944
+the 1116 1914 1166 1944 1168 1914 1116 1944
+equilibrium 1189 1914 1391 1952 1392 1914 1188 1952
+distribution 1414 1914 1619 1944 1620 1914 1414 1944
+function 1641 1915 1789 1945 1790 1915 1641 1945
+at 1812 1915 1842 1945 1843 1915 1812 1945
+
+those 1046 1960 1136 1990 1137 1960 1046 1990
+values 1152 1960 1260 1990 1261 1960 1152 1990
+of 1277 1960 1309 1990 1311 1960 1277 1990
+p 1321 1971 1341 1998 1342 1971 1320 1998
+where 1359 1960 1463 1990 1464 1960 1359 1990
+f( 1474 1967 1505 1990 1507 1967 1474 1990
+p)> 1514 1972 1590 1999 1592 1972 1512 1999
+0. 1602 1972 1629 1991 1631 1972 1602 1991
+If 1648 1961 1675 1991 1677 1961 1648 1991
+this 1693 1961 1752 1991 1753 1961 1693 1991
+is 1770 1961 1794 1991 1795 1961 1770 1991
+to 1811 1961 1843 1991 1844 1961 1811 1991
+
+be 1046 2006 1084 2036 1085 2006 1046 2036
+a 1107 2016 1125 2036 1126 2016 1107 2036
+legitimate 1149 2006 1318 2044 1319 2006 1148 2044
+candidate 1341 2006 1512 2036 1514 2006 1341 2036
+for 1536 2007 1586 2037 1587 2007 1536 2037
+a 1609 2017 1628 2037 1628 2017 1609 2037
+cumulative 1651 2007 1843 2037 1844 2007 1651 2037
+
+distribution 1046 2053 1248 2083 1249 2053 1046 2083
+function, 1279 2053 1435 2083 1436 2053 1279 2083
+it 1465 2053 1487 2083 1488 2053 1465 2083
+should 1516 2055 1632 2084 1633 2055 1516 2084
+be 1661 2055 1699 2084 1700 2055 1661 2084
+an 1729 2064 1769 2084 1770 2064 1729 2084
+in 1800 2055 1822 2084 1823 2055 1800 2084
+- 1823 2064 1843 2084 1844 2064 1823 2084
+
+creasing 1045 2097 1188 2135 1189 2097 1044 2135
+function 1204 2097 1348 2127 1350 2097 1204 2127
+of 1366 2097 1399 2127 1400 2097 1366 2127
+p. 1410 2108 1440 2135 1442 2108 1409 2135
+This 1458 2097 1533 2127 1534 2097 1458 2127
+is 1550 2098 1573 2128 1574 2098 1550 2128
+easy 1590 2108 1664 2136 1665 2108 1589 2136
+to 1681 2098 1712 2128 1713 2098 1681 2128
+verify: 1729 2098 1838 2136 1840 2098 1727 2136
+
+PROPOSITION 1046 2188 1339 2218 1340 2188 1046 2218
+5: 1368 2198 1397 2218 1398 2198 1368 2218
+ƒj(p)/( 1427 2199 1595 2226 1596 2199 1426 2226
+? 1596 2200 1615 2219 1615 2200 1596 2219
+(p) 1615 2200 1674 2227 1675 2200 1614 2227
+… 1692 2199 1717 2219 1718 2199 1692 2219
+? 1733 2199 1718 2218 1718 2198 1733 2219
+s(P)) 1733 2189 1842 2219 1843 2189 1733 2219
+
+is 1044 2238 1067 2265 1068 2238 1044 2265
+strictly 1084 2238 1197 2273 1198 2238 1083 2273
+decreasing 1216 2238 1393 2273 1394 2238 1215 2273
+in 1411 2238 1438 2265 1439 2238 1411 2265
+p. 1452 2246 1482 2273 1483 2246 1451 2273
+
+PROOF: 1046 2326 1198 2356 1200 2326 1046 2356
+
+Taking 1085 2371 1208 2409 1209 2371 1084 2409
+the 1231 2371 1281 2401 1282 2371 1231 2401
+derivative 1305 2371 1476 2401 1477 2371 1305 2401
+it 1500 2371 1521 2401 1522 2371 1500 2401
+suffices 1544 2372 1674 2402 1675 2372 1544 2402
+to 1699 2372 1730 2402 1731 2372 1699 2402
+show 1753 2372 1842 2402 1843 2372 1753 2402
+
+that 1045 2417 1111 2447 1112 2417 1045 2447
+
+It 206 2625 231 2655 232 2625 206 2655
+is 251 2625 275 2655 276 2625 251 2655
+clear 295 2625 377 2655 379 2625 295 2655
+that 398 2625 462 2655 464 2625 398 2655
+all 483 2625 522 2655 523 2625 483 2655
+prices 543 2625 642 2663 643 2625 542 2663
+that 663 2626 731 2656 732 2626 663 2656
+are 751 2636 801 2656 803 2636 751 2656
+charged 823 2626 961 2664 962 2626 822 2664
+
+with 162 2669 237 2698 238 2669 162 2698
+positive 264 2669 398 2707 399 2669 263 2707
+density 424 2669 548 2707 549 2669 422 2707
+must 575 2669 657 2698 659 2669 575 2698
+yield 683 2670 768 2708 770 2670 682 2708
+the 797 2670 848 2700 849 2670 797 2700
+same 875 2679 961 2700 962 2679 875 2700
+
+6 1080 2600 1100 2617 1101 2600 1080 2617
+One 1101 2593 1151 2617 1152 2593 1101 2617
+can 1175 2600 1222 2617 1223 2600 1175 2617
+also 1244 2593 1296 2617 1298 2593 1244 2617
+formulate 1319 2593 1456 2617 1457 2593 1319 2617
+the 1479 2593 1520 2617 1521 2593 1479 2617
+model 1543 2594 1628 2618 1629 2594 1543 2618
+with 1651 2594 1711 2618 1712 2594 1651 2618
+a 1735 2601 1748 2618 1748 2601 1735 2618
+fixed 1770 2594 1837 2618 1838 2594 1770 2618
+
+number 1038 2640 1145 2664 1146 2640 1038 2664
+of 1159 2640 1187 2664 1188 2640 1159 2664
+firms. 1202 2640 1277 2664 1279 2640 1202 2664
+In 1295 2640 1321 2664 1322 2640 1295 2664
+this 1338 2640 1384 2664 1385 2640 1338 2664
+case, 1399 2648 1464 2664 1465 2648 1399 2664
+expected 1481 2642 1602 2671 1603 2642 1479 2671
+profits 1618 2642 1709 2671 1710 2642 1616 2671
+must 1725 2642 1790 2665 1791 2642 1725 2665
+be 1805 2642 1836 2665 1837 2642 1805 2665
+
+equal 1037 2679 1111 2709 1112 2679 1035 2709
+to 1126 2679 1151 2703 1152 2679 1126 2703
+H 1168 2679 1174 2703 1174 2679 1168 2703
+1 1174 2687 1190 2703 1191 2687 1174 2703
+(r). 1191 2687 1257 2703 1259 2687 1191 2703
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/BitMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,463 @@
+ /* *****************************************************************
+ * BitMap.cc - Member functions for a BitMap *
+ * because of the complexity of the many Bit functions, they are *
+ * not all in this file. In this file are only the following *
+ * functions:
+ *
+ * BitMap() - Constructor
+ * ~BitMap() - Destructor
+ *
+ * int imageLength();
+ * int imageWidth();
+ * MapStatus & status;
+ * MapStatus readMap(char * filename) ;
+ * MapStatus writeMap(char * filename);
+ *
+ * // Data Access and low level manipulation functions
+ * uchar * row(int i) - Returns a pointer to row i
+ * MapStatus setBit(Point point, Color clr);
+ * Color readBit(Point point);
+ *
+ *
+ ***************************************************************/
+
+#include "BitMap.h"
+#include <iostream.h>
+#include <stdio.h>
+#include "status_message.h"
+
+inline int set_pixel_value(uchar** new_data, int y, int x, int new_val)
+{
+ new_data[y][x/8] |= (uchar)(new_val << (7-(x%8)));
+}
+
+inline int get_pixel_value(uchar** data, int y, int x)
+{
+ if((data[y][x/8]) & (1 << (7 - (x%8))))
+ return 1;
+ else
+ return 0;
+}
+
+
+
+BitMap::BitMap()
+:fImageWidth(0), fImageLength(0), fStatus(EMPTY), fMapData(NULL)
+/*--------------------------------------------------------------
+Primary Function: Constructor
+Return Value: pointer to new BitMap
+Effects: Initialize status to empty other values to zero
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{ };
+
+
+
+ BitMap::~BitMap()
+/*--------------------------------------------------------------
+Primary Purpose: destructor
+Effects: Deletes each row of BitPairs then the array of rows
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ if (fMapData != NULL)
+ {
+ int i;
+
+ // delete each row
+ for (i=0; i< fImageLength; i++)
+ delete fMapData[i];
+
+ // delete array of rows
+ delete fMapData;
+ }
+};
+
+int BitMap::readBit(Point p)
+ {return get_pixel_value(fMapData,p.y(), p.x());}
+
+uchar * BitMap::row(int i)
+/*--------------------------------------------------------------
+Primary Purpose: Access a row of the BitMap
+Arguments: i is the row to access
+Constraints: i < fImageLength
+Rev: KM 10/15
+---------------------------------------------------------------*/
+{
+ return fMapData[i];
+
+};
+
+
+
+MapStatus BitMap::readMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read an BitMap from a TIFF file
+Arguments: filename of TIFF file
+Return Value: A MapStatus, either VALID or READERROR
+Effects:
+ * BitMap::readMap(filename) will read a two level TIFF file
+ * and place it in an BitMap. The private fields of the BitMap
+ * set are:
+ fImageWidth - the pixel width of the image
+ fImageLength - the vertical pixel length of the image
+ fstat - the status after the Read VALID, OTHERERROR,READERROR
+ fMapData - an array of pointers to uchar arrays/
+Constraints: filename must be a two level TIFF file
+Rev: 10/15/95 KM Portions Borrowed from Assignment 1
+---------------------------------------------------------------*/
+{
+ TIFF *tif;
+ short photometric;
+
+ // Open File - Read length and width
+
+ tif = TIFFOpen (filename, "r");
+ if(tif == NULL)
+ { fStatus= OPENERROR;
+ return OPENERROR;
+ }
+
+ TIFFGetField (tif, TIFFTAG_IMAGELENGTH, &fImageLength);
+ TIFFGetField (tif, TIFFTAG_IMAGEWIDTH, &fImageWidth);
+ TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photometric);
+
+
+ fMapData = new (uchar *)[fImageLength];
+
+ printf("open succeeded on file %s. length = %d. width = %d\n",
+ filename, fImageLength, fImageWidth);
+ /* if(photometric == PHOTOMETRIC_MINISWHITE)
+ printf("min-is-white format\n");
+ else if(photometric == PHOTOMETRIC_MINISBLACK )
+ printf("min-is-black format\n"); */
+
+ if((photometric != PHOTOMETRIC_MINISWHITE) &&
+ (photometric != PHOTOMETRIC_MINISBLACK))
+ printf("with an unknown(!) photometric: %d\n", photometric);
+
+ // Calculate number of chars in a row
+ int numChars = (fImageWidth / 8 ) +1 ;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Reading %s...", filename);
+ last_status = 0.0;
+
+ for (int row = 0; row < fImageLength; ++row)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_read_status(row, fImageLength);
+ fMapData[row] = new uchar[numChars];
+ fMapData[row][numChars - 1] = 0;
+ TIFFReadScanline(tif,fMapData[row],row,0);
+ if(photometric != PHOTOMETRIC_MINISWHITE)
+ invertBitsInBuffer(fMapData[row], numChars);
+ // need to clear top and bottom row
+ if(row == fImageLength-1 || row == 0)
+ clearBitsInBuffer(fMapData[row], numChars);
+
+ }
+ last_status = 0.0;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done reading %s", filename);
+ TIFFClose(tif);
+ fStatus = VALID;
+ return VALID;
+
+};
+
+
+MapStatus BitMap::writeTclMap(char * filename,
+ Point ul, Point lr, int scaledown)
+{
+ FILE * outfile;
+ int numChars= fImageWidth /8 + 1;
+ outfile = fopen(filename, "w");
+
+ if(!outfile)
+ cout << " Could not open " << filename << endl;
+
+ fprintf(outfile, "%s_width %d\n",filename,fImageWidth);
+ fprintf(outfile, "%s_height %d\n",filename,fImageLength);
+ fprintf(outfile, "static char %s_bits[] {\n",filename);
+
+
+ for (int r = 0; r < fImageLength; r++)
+ {
+ for(int col=0; col < numChars; col++)
+ {
+ fprintf(outfile, "%4#x,", fMapData[r][col]);
+
+ if (!(r == fImageLength-1 && col == numChars-1))
+ fprintf(outfile, " ,");
+
+ if (((r*numChars + col) % 15)==0)
+ fprintf(outfile,"\n");
+ }
+
+ }
+ fprintf(outfile,"}\n");
+
+ fclose(outfile);
+
+}
+
+
+
+short int BitMap::grayScale(Point ul, Point lr)
+// Dummy function for now
+{
+ int numPixels = pixelsInRegion( ul, lr);
+ int area = (lr.x() - ul.x()+1) * (lr.y() - ul.y()+1);
+ if (area < numPixels) {
+ printf("Uh oh! Area = %d and pixels = %d\n", area, numPixels);
+ assert(area >= numPixels);
+ }
+ short int gscale =(short int)(((float)numPixels/area) * 255);
+
+ return gscale;
+ };
+
+
+
+const int BitMap::pixelsInRegion (Point ul, Point lr)
+{
+ assert (ul >= Point(0,0)); /* did someone overload these? */
+ if (!(lr <= Point(fImageWidth+8, fImageLength+8)))
+ printf("problem\n");
+ // assert (lr <= Point(fImageWidth+8, fImageLength+8));
+
+ if(ul > lr)return 0;
+
+ int ulx = ul.x(); int uly = ul.y();
+ int lrx = lr.x(); int lry = lr.y();
+ uchar * curRow;
+ int pixCount = 0;
+
+
+ for(int r = uly; r <= lry; r++)
+ {
+ curRow = row(r);
+ // Count middle (whole) characters
+ pixCount += pixelsBetween(curRow, ulx, lrx);
+// cout << pixelsBetween(curRow,ulx,lrx) <<" ";
+// cout << pixCount << endl;
+
+ }
+ return pixCount;
+};
+
+
+int BitMap::minThickness(Point top, Point bottom)
+ // returns the minimum number of pixels joined from top to bottom
+ // requires that top.x == bottom.x
+{
+ assert(top.x() = bottom.x());
+ Point p = top;
+ int thickness = 0;
+ int min = pixelsInRegion(top,bottom);
+ for(p = top; p.y()<= bottom.y(); (p.y())++)
+ {
+ if(readBit(p))
+ thickness++;
+ else if(thickness >0 && thickness < min)
+ {
+ min = thickness;
+ thickness = 0;
+ }
+ }
+ if( thickness > 0 && thickness < min)
+ min = thickness;
+ return min;
+
+}
+
+void testBitMap(char * filename)
+// Reads in BitMap and prints on screen
+{
+ BitMap * m = new BitMap;
+
+ m->readMap(filename);
+ int numChars = (m->imageWidth() / 8 )+ 1;
+
+ for (int r = 0; r < m->imageLength(); r++){
+ for (int c =0; c < numChars; c++) byteprint(m->row(r)[c]);
+ printf( "\n");
+ }
+
+};
+
+
+void byteprint(char d) // print bits in a byte, high bit on left
+{
+ for (int i= 7; i>=0; --i) {
+ if ((d>>i)&1) printf("X");
+ else printf(" ");
+ }
+};
+
+void bitprint(char d, int x)
+{
+ if ((d>>(7-x))&1) cout << "X";
+ else cout << " ";
+};
+
+
+class Page;
+
+void testPixelsInRegion(BitMap * bmap, RLEMap * rmap)
+// Reads in file and compares pixelsInRegion to RLEVersion
+{
+
+ int bmapcnt, rmapcnt;
+
+ cout << "Testing pixelsInRegion " << endl;
+
+ bmapcnt = bmap->pixelsInRegion(Point(0,0),
+ Point(bmap->imageWidth()-1, bmap->imageLength()-1));
+
+ rmapcnt = rmap->pixelsInRegion(Point(0,0),
+ Point(rmap->imageWidth()-1, rmap->imageLength()-1));
+
+ cout << "For whole page:";
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start on char edge end on edge (8,8) (16,21) ";
+ bmapcnt = bmap->pixelsInRegion(Point(8,8), Point(16,21));
+ rmapcnt = rmap->pixelsInRegion(Point(8,8), Point(16,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start on char edge, end mid char (0,8) (50,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(0,8), Point(50,21));
+ rmapcnt = rmap->pixelsInRegion(Point(0,8), Point(50,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start mid char, end on edge (2,8) (7,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,8), Point(7,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,8), Point(7,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start mid char, end mid char (2,8) (9,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,8), Point(9,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,8), Point(9,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start and, end same char (2,8) (4,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,4), Point(4,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,4), Point(4,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+};
+
+MapStatus BitMap::rotateMap(Angle angle)
+/*
+ Thanks to Clint Staley and S. Jacques @calpoly
+ for this bitmap rotation alg.
+
+ copied and slightly modified since it wass a pain getting
+ the RLE rotate to work and I think this might be decently
+ fast -AR
+*/
+{
+ int nx,ny,newheight,newwidth,oldheight,oldwidth,i,j,halfnewheight,halfnewwidth;
+ int halfoldheight,halfoldwidth;
+ double radians;
+ double cosval,sinval;
+ uchar** newMapData;
+
+ fprintf(stderr,"Rotating Image %lf Degrees\n",angle);
+ radians = -(angle) / ((180 / 3.142));
+ cosval = cos(radians);
+ sinval = sin(radians);
+
+ oldheight = fImageLength;
+ oldwidth = fImageWidth;
+
+ newwidth = (int)abs((int)(oldwidth*cosval)) + (int)abs((int)(oldheight*sinval));
+ newheight = (int)abs((int)(-oldwidth*sinval)) + (int)abs((int)(oldheight*cosval));
+
+ halfnewheight = newheight / 2;
+ halfnewwidth = newwidth / 2;
+ halfoldwidth = oldwidth /2;
+ halfoldheight = oldheight /2 ;
+
+ newMapData = new (uchar*) [newheight];
+
+ int num_chars = (newwidth / 8) + 1;
+
+ for (int row = 0; row < newheight; ++row)
+ {
+ newMapData[row] = new uchar[num_chars];
+ for (int k = 0; k < num_chars; k++)
+ newMapData[row][k] = '\0';
+ }
+
+ last_status = 0.0;
+ for(i=0;i < newheight;i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_rotation_status((int)i, (int)newheight);
+ for(j=0;j < newwidth;j++)
+ {
+
+/* set_pixel_value(newMapData, i, j, 0);
+ break; */
+
+ nx =(int)( (j - halfnewwidth)*cosval + (i-halfnewheight)*sinval);
+ ny =(int)( -((j - halfnewwidth)*sinval) + (i - halfnewheight)*cosval);
+ nx = nx + halfoldwidth;
+ ny = ny + halfoldheight;
+ if ((nx < oldwidth) && (ny < oldheight) && (nx > 0) && (ny > 0))
+ {
+ if(get_pixel_value(fMapData, ny, nx))
+ set_pixel_value(newMapData, i, j, 1);
+ else
+ set_pixel_value(newMapData, i, j, 0);
+ }
+ else
+ {
+ set_pixel_value(newMapData, i, j, 0);
+ }
+ }
+ }
+ if(ENABLE_USER_INTERFACE)
+ set_status("Rotating Image: Done");
+ last_status = 0.0;
+
+/* free up the old storage */
+ for(i = 0; i < fImageLength; i++)
+ {
+ free(fMapData[i]);
+ }
+ free(fMapData);
+
+/* assign pointer, etc to the new stuff */
+ fMapData = newMapData;
+ fImageLength = newheight;
+ fImageWidth = newwidth;
+}
+
+/*
+
+int set_pixel_value(uchar** new_data, int y, int x, int new_val)
+{
+ new_data[y][x/8] |= (uchar)(new_val << (7-(x%8)));
+}
+
+int get_pixel_value(uchar** data, int y, int x)
+{
+ if((data[y][x/8]) & (1 << (7 - (x%8))))
+ return 1;
+ else
+ return 0;
+}
+
+*/
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/BitMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,78 @@
+#ifndef _BITMAP_H
+#define _BITMAP_H
+#include "system.h"
+
+/** A BitMap representation stores image in an array of unsigned character
+ arrays. There is one uchar array per row. Each bit of the uchar
+ represents a pixel.
+***/
+
+
+class RLEMap;
+class Point;
+class BitMap;
+
+extern void byteprint(char d);
+extern void bitprint(char d, int x);
+
+
+class BitMap{
+ public:
+ friend MapStatus convertMap(RLEMap *,BitMap*,Point,Point);
+ friend MapStatus convertMap(BitMap *,RLEMap*,Point,Point);
+
+ BitMap();
+
+ ~BitMap();
+
+ int & imageLength()
+ {return fImageLength;};
+
+ int & imageWidth()
+ {return fImageWidth;};
+
+ readBit(Point p);
+
+ MapStatus & status()
+ {return fStatus; };
+
+ uchar * row(int i); // returns a pointer to row i
+
+
+ // I/O operations. readMap and writeMap are from/to 2 level TIFF files
+
+ MapStatus readMap(char * filename);
+ MapStatus writeMap(char * filename); // not done
+
+ // Write out BitMap format for TCL/TK display
+ MapStatus writeTclMap(char * filename, Point ul, Point lr, int scaledown);
+ // Detect skew Angle
+ Angle skewAngle();
+
+ //Rotate the map designated angle.
+ MapStatus rotateMap(Angle angl);
+
+
+ // Return a ratio of black pixels to white pixels
+ // scaled to 255 0 = all white 255=all black
+ short int grayScale(Point ul, Point lr);
+
+ // number of black pixels in bounding box
+ const int BitMap::pixelsInRegion( Point ul, Point lr);
+ int BitMap::minThickness(Point ul, Point lr);
+private:
+
+
+ int fImageWidth;
+ int fImageLength;
+ MapStatus fStatus;
+ uchar ** fMapData;
+
+} ;
+
+
+
+#endif
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Boolean.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,25 @@
+// Copywrite David Wolfe, Rob Meyers, Doug Young, Edouard Serban-Schreiber
+// DO NOT DISTRIBUTE WITHOUT PERMISSION OF AN AUTHOR
+// See README
+
+#ifndef BOOLEAN_H
+#define BOOLEAN_H
+
+
+#include "assert.h"
+typedef int Boolean;
+typedef Boolean Direction;
+
+#define TRUE 1
+#define FALSE 0
+#define true 1
+#define false 0
+
+
+#define HORIZONTAL true
+#define VERTICAL false
+#define FIRSTPLAY true
+
+// Use c++ bool as a Boolean type.
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Component.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,607 @@
+
+#include "system.h"
+#include "Component.h"
+#include "BitMap.h"
+#include <assert.h>
+#include "list.h"
+#include "tcl_interface.h"
+
+/*** Component.cc
+ Member functions for Components
+ Component functions defined in Component.h
+ rev 12/9/95 KM
+***/
+
+extern Page * global_page;
+extern Page * active_page;
+
+
+Components::Components()
+:List()
+{
+}
+
+
+Components::~Components()
+{
+
+ for (ListElement *ptr = first; ptr != NULL && ptr->item!=NULL;
+ ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Component *) (ptr->item);
+ }
+ while(!IsEmpty())
+ Remove();
+}
+
+Component * Components::compAt(Point p)
+/*--------------------------------------------------------------
+Primary Purpose: Return smallest component that contains this point
+Arguments: Point of request
+Return Value: Component pointer or NULL if there is no component here
+Rev 4/22/96
+---------------------------------------------------------------*/
+{
+ double size;
+ Component * returnComp = NULL;
+
+ int rowwidth = (((Component *) last->item)->ul().x()) -
+ (((Component *) first->item)->ul().x()) ;
+
+ int rowheight = (((Component *) last->item)->ul().x()) -
+ (((Component *) first->item)->ul().x());
+
+
+ size = rowwidth*rowheight; // this is now as big as the whole line.
+
+ for (ListElement *ptr = first; ptr != NULL && ptr->item!=NULL;
+ ptr = ptr->next)
+ {
+ Component * item = (Component *) ptr->item;
+ if ( p > item->ul() && p < item->lr())
+ {
+ double area = item->area();
+ if(area < size) size = area;
+ returnComp = item;
+ }
+ }
+ return returnComp;
+
+}
+
+
+
+Component * Component::copy()
+{
+ Component * newcomp = new Component(ful, flr);
+ newcomp->numBits() = fnumBits;
+ newcomp->fasciiId = new char[strlen(fasciiId)];
+ strcpy(newcomp->fasciiId, fasciiId);
+ newcomp->fconfid = confid();
+ newcomp->charGroup = charGroup;
+ newcomp->ffontId = fontId();
+ for (int i = 0; i < numProperties; i++)
+ newcomp->property(i) = fproperty[i];
+ return newcomp;
+}
+
+
+
+int Component::AddToComponent(ListElement* intrvl, RLEMap* rlemap,
+ int horizMerge)
+{
+ assert(intrvl != NULL);
+ List* list = new List(); //make a new queue
+ ListElement* current;
+ ListElement* nextelt;
+ int counter = 0;
+ int currentRow;
+
+ if (intrvl->previous != NULL)
+ intrvl->previous->next = intrvl->next;
+ else rlemap->fMapData[((RLEPair *) intrvl->item)->row]->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = intrvl->previous;
+ list->first = intrvl; //put starting interval on queue
+ list->last = intrvl;
+ list->length = 1;
+ intrvl->next = NULL;
+ intrvl->previous = NULL;
+ currentRow = 0;
+ while ((intrvl = list->first) != NULL &&
+ currentRow < rlemap->imageLength()-1) //Take an interval off queue
+ {
+ currentRow = ((RLEPair *) intrvl->item)->row;
+
+ for (int i=-1; i < 2; i+=2) {
+ current = rlemap->fMapData[currentRow+i]->first;
+ while ((current != NULL)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+horizMerge)) {
+
+// printf("Looking at an interval on row %d that goes from %d to %d\n",
+// currentRow, ((RLEPair *) intrvl->item)->start,
+// ((RLEPair *) intrvl->item)->end);
+
+ if ((((RLEPair *) current->item)->end
+ >= ((RLEPair *) intrvl->item)->start-1)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+horizMerge)) {
+// printf("Adding connection for interval on row %d that goes from %d to %d\n", currentRow+i,
+// ((RLEPair *) current->item)->start,
+// ((RLEPair *) current->item)->end);
+
+ if (current->previous != NULL)
+ current->previous->next = current->next; //take off RLEMap
+ else
+ rlemap->fMapData[currentRow+i]->first = current->next;
+ if (current->next != NULL)
+ current->next->previous = current->previous;
+ nextelt = current->next;
+ list->last->next = current; //add to queue
+ current->previous = list->last;
+ list->last = current;
+ current->next = NULL;
+ current = nextelt;
+ list->length++;
+ } else
+ current = current->next;
+ }
+ }
+
+ if ((((RLEPair *) intrvl->item)->start < ful.x()) || (ful.x()==-1)) {
+ ful.x() = ((RLEPair *) intrvl->item)->start;
+// printf("Changed ful.x to %d\n", ful.x());
+ }
+ if ((((RLEPair *) intrvl->item)->end > flr.x()) || (flr.x()==-1)) {
+ flr.x() = ((RLEPair *) intrvl->item)->end;
+// printf("Changed flr.x to %d\n", flr.x());
+ }
+ if ((((RLEPair *) intrvl->item)->row < ful.y()) || (ful.y()==-1)) {
+ ful.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed ful.y to %d\n", ful.y());
+ }
+ if ((((RLEPair *) intrvl->item)->row > flr.y()) || (flr.y()==-1)) {
+ flr.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed flr.y to %d\n", flr.y());
+ }
+ list->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = NULL;
+ delete ((RLEPair *) (intrvl->item));
+ delete intrvl; //so the letter O won't go forever
+ counter++;
+ list->length--;
+ }
+
+ delete list;
+ return counter;
+
+}
+
+void Component::setProperties(BitMap * map) // was BitMap
+/*--------------------------------------------------------------
+Primary Purpose: Set the property vector for this component
+Arguments: The BitMap to which this component belongs
+Return Value:
+Effects: The component is divided into a 5 by 5 grid. A gray
+ scale (0 - 255) for each section is determined. The gray scale
+ is 0 for all white, 255 for all black but normally will e somewhere
+ between the two. The gray scales are represented in properties
+ 0-24.
+ Property 25 is the grayscale accross the top.
+ Property 26 is the grayscale accross the bottom.
+ Property 27 is the width/height ratio again scaled to (0-255)
+ Actually the formula for property 27 is
+ width/ height * 255 if height > width
+ 1- height/width * 255 if width > height
+ This way near 0 is very tall and thin
+ near 128 height near width
+ near 255 very wide
+ Property 28 is Indicator of a vertically disjoint character
+ like i and j.
+ Also the total number of black pixels is set in fnumBits.
+ // This is not used at this time.
+Constraints: The data fields ful and flr must already be set
+ before calling this function. These fields specify a bounding
+ box for the character within the BitMap.
+Rev: 12/9 KM
+---------------------------------------------------------------*/
+{
+ if (ful > flr)
+ printf("Problem\n");
+ assert (ful <= flr);
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+ float height, width;
+ int propNum;
+ float darkest = 0;
+ float lightest;
+ int darkrow = 0;
+ int lightrow = 0;
+
+ Point sectionLr, sectionUl;
+ // Set Number of bits
+ fnumBits = map->pixelsInRegion(ful, flr);
+
+ setSectionFlags(hflag, vflag);
+ for (int r = 0; r < NumVertDiv; r++)
+ for (int c = 0; c < NumHorizDiv; c++)
+ {
+ propNum = (r * NumHorizDiv) + c;
+ sectionUl = Point(hflag[c], vflag[r]);
+ sectionLr = Point(hflag[c+1]-1, vflag[r+1]-1);
+ if (sectionUl <= sectionLr)
+ fproperty[propNum] = map->grayScale(sectionUl, sectionLr);
+ assert(fproperty[propNum] >= 0 && fproperty[propNum] < 256);
+ }
+
+ // set the height/width ratio
+ // 0 is very thin 128 is even 256 is very wide.
+ width = flr.x() - ful.x() + 1;
+ height = flr.y() - ful.y() + 1;
+
+ // Grayscale across the top - Indicator of top bar
+ sectionUl = Point(ful.x(), ful.y());
+ sectionLr = Point(flr.x(), ful.y() + (int)(height/(NumVertDiv*2)));
+ fproperty[25] = map->grayScale(sectionUl, sectionLr);
+
+ // Grayscale across bottom - Indicator of a foot for l opposed to 1
+ sectionUl = Point(ful.x(), flr.y() - (int)(height/(NumVertDiv*2)));
+ sectionLr = Point(flr.x(), flr.y());
+ fproperty[26] = map->grayScale(sectionUl, sectionLr);
+
+ float hdivw = (float)height/width;
+ float wdivh = (float) width/height;
+ if (width > height)
+ fproperty[27]= (short int) ((1- hdivw/2)*255);
+ else
+ fproperty[27] = (short int)((wdivh/2)* 255);
+
+ // is this a disjoint character like i or j 255 = yes 0 = no
+ fproperty[28]=0;
+ lightest = width;
+ for(int row = ful.y(); row < flr.y(); row++)
+ {
+ int pixelsThisRow = pixelsBetween(map->row(row), ful.x(), flr.x());
+ if(!(pixelsThisRow))
+ fproperty[28]=255;
+ }
+
+ fproperty[29]= 0;
+ for(int p = 0; p < numProperties; p++)
+ assert(fproperty[p] >= 0 && fproperty[p] < 256);
+
+
+}
+
+void Component::setSectionFlags(short int hflag[], short int vflag[])
+/*--------------------------------------------------------------
+Primary Purpose: Breaks this component into a grid NumHorizDiv X NumVertDiv
+ for determining grayscale property vectors.
+Arguments: hflag[] is an empty array to be filled by this procedure with
+ the starting columns of each horizontal subdivision. vflag[] will
+ be filled with the vertical subdivisions.
+Effects: fills hflag[] with the starting column for each subdivision.
+ The last element of the array is actually the pixel immediately
+ following the last subdivision. The last subdivision contains any
+ remaining pixels that did not divide evenly amongst the divisions.
+ vflag[NumHorizDiv] is comparable for vertical supdivisions.
+ Example ful = (0,25) flr = (52,46) NumHorizDiv = NumVertDiv = 5
+ hflag[6] = { 0,10,20,30,40,53 }
+ vflag[6] = {25.29.33.37.41.47 }
+Constraints: ful and flr must be set to mark the bounding box before
+ calling this procedure.
+Rev: 10/27 KM
+---------------------------------------------------------------*/
+{
+ int ulx = ful.x(); int uly = ful.y();
+ int lrx = flr.x(); int lry = flr.y();
+
+ int width = lrx - ulx+1;
+ int height = lry - uly+1;
+
+ int horizDiv = width/NumHorizDiv;
+ int vertDiv = height/NumVertDiv;
+
+ int horizExtra = width - horizDiv*NumHorizDiv;
+ int vertExtra = height - vertDiv*NumVertDiv;
+
+ int i, add;
+ for (i = 0; i < NumHorizDiv; i++)
+ {
+ if(horizExtra - i > 0) add = i; else add = horizExtra;
+ hflag[i] = ulx + (i*horizDiv)+ add;
+ }
+ hflag[i] = lrx + 1; // Closes off last division
+
+ int j;
+ for(j = 0; j < NumVertDiv; j ++)
+ {
+ if(vertExtra - j > 0) add = j; else add = vertExtra;
+ vflag[j] = uly + (j*vertDiv)+ add;
+ }
+ vflag[j] = lry + 1;
+
+
+
+
+}
+
+
+Distance Component::distance(Component * comp)
+/*--------------------------------------------------------------
+Primary Purpose: Determines heuristic distance between two components
+Arguments: Another component to compare
+Return Value: integer value which represents the distance between two
+ components. Distance = sum over i of
+ weight *square (this->fproperty[i] - comp->fproperty[i])
+ weight for i == 26, 27 is 3 weight is 1 for all other
+ properties
+Constraints: setProperties must have been run on both components
+
+Rev: 11/1 KM
+---------------------------------------------------------------*/
+{
+ Property * a = fproperty;
+ Property * b = comp->properties();
+ Distance dist=0;
+ int dif=0;
+ int worst = 0;
+ int weight = 1;
+
+ for(int i= 0; i < numProperties; i++)
+ {
+
+ if(i == 27) weight = 50;
+ if (i == 28) weight = 3;
+ else if(i != 27 && i != 28)
+ weight = 1;
+
+ dif = (a[i] - b[i]);
+ dist += weight*dif*dif;
+ }
+
+ return dist;
+}
+
+
+
+
+
+
+
+
+
+void printVector(short int vector[], int size)
+{
+ for (int i = 0; i < size; i++)
+ cout << vector[i] << " " ;
+ cout << endl;
+
+}
+
+
+void testProperties(Component* c, BitMap * map)
+{
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+
+ cout << "First test subDivisions " << endl;
+ c->setSectionFlags(hflag, vflag);
+ cout << "Horizontal flags" <<endl;
+ printVector(hflag, NumHorizDiv + 1);
+ cout << "Vertical flags" <<endl;
+ printVector(vflag, NumHorizDiv + 1);
+
+ cout << "Now lets look at the properties " << endl;
+ // setSectionFlags will actually get called again within setProperties
+ c->setProperties(map);
+ printVector(c->properties(), NumHorizDiv*NumVertDiv + 1);
+ cout << endl << " The distance of this component from itself: " << " ";
+ cout << c->distance(c) << endl;
+
+}
+
+void Component::display_bounding_box()
+{
+ display_bounding_box("blue");
+}
+
+void Component::display_bounding_box(char * color)
+{
+ if(global_page == active_page)
+ {
+ display_bounding_box(color, SCALE_FACTOR,
+ ".main_window.display.work_space");
+ }
+}
+
+void Component::display_bounding_box(char * color, double scaleFactor,
+ char * window)
+{
+ int ulx = (ul()).x();
+ int uly = (ul()).y();
+ int lrx = (lr()).x();
+ int lry = (lr()).y();
+ scale(ulx,scaleFactor); scale(uly,scaleFactor); scale(lrx,scaleFactor);
+ scale(lry,scaleFactor);
+
+ if(ENABLE_USER_INTERFACE)
+ docommand("%s create rectangle %d %d %d %d -outline %s -tags IMAGE_TAG ", window, ulx, uly, lrx, lry, color);
+}
+
+
+Distance Component::recognize(Component * learnedchars)
+// This is out of date. Current recognize is below
+{
+ Distance d, nextd;
+ char id;
+
+
+// printf("Another call to recognize\n");
+ d = (256*256)*numProperties; // this is the biggest distance
+
+
+
+ for (int i = 0; i < 256; i++)
+ {
+ if(learnedchars[i].confid() != 0)
+ {
+ nextd = distance(&learnedchars[i]);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = (char) i;
+ }
+ }
+
+ }
+
+ fasciiId = &id;
+ /* printf("Recognized a Component: %c\n", id); */
+ return d;
+}
+
+
+Distance Component::recognize(Components * learnedgroups, bool allGroups)
+{
+ Distance d, worstDistance,nextd;
+ char * id;
+ short int fontid;
+ float tempd;
+ worstDistance = 150000;
+
+// printf("Another call to new recognize\n");
+ d = (65536)*numProperties; // this is the biggest distance
+
+
+ fconfid = 0;
+
+ for(int g = 0; g < NumCharGroups &&
+ ((fconfid < ConfidenceThreshold) || allGroups); g++)
+ {
+ int offset = (charGroup+g) % NumCharGroups;
+ // if (offset == 4 && charGroup != 4) continue;
+ for (ListElement* ptr = learnedgroups[offset].first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ Component * item = (Component *) ptr->item;
+ nextd = distance(item);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = item->fasciiId;
+ fontid = item->ffontId;
+ }
+
+ }
+
+ if (d >= worstDistance)
+ tempd = worstDistance - 1;
+ else tempd = d;
+
+ fconfid = (unsigned short int)
+ (255 - (tempd/worstDistance)*256);
+ if(charGroup == 4) break; // dont check other groups for floaters
+ }
+
+
+ fasciiId = new char[strlen(id)+1];
+ strcpy(fasciiId,id);
+
+
+ ffontId = fontid;
+ //printf("Recognized a Component: %s confid-%d dist-%u h/wratio %u\n",
+ // id, fconfid, d, property(27));
+
+
+
+ return d;
+}
+
+
+
+int Component::vertShrink(BitMap * bitmap)
+{
+ int r;
+ int shrunk = 0;
+ for(r = ful.y(); r < flr.y(); r++)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ ful.y() = r;
+ shrunk = 1;
+ break;
+ }
+ for(r = flr.y(); r > ful.y(); r--)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ flr.y() = r;
+ shrunk = 1;
+ break;
+ }
+
+ return shrunk;
+}
+
+
+int Component::horizontalShrink(BitMap * bitmap)
+{
+ int c;
+ int shrunk = 0;
+ for(c = ful.x(); c < flr.x(); c++)
+ if (bitmap->pixelsInRegion( Point(c, ful.y()),
+ Point(c, flr.y())))
+ {
+ if (ful.x() != c)
+ shrunk = 1;
+ ful.x() = c;
+ break;
+ }
+
+ // now start from the other side
+ for(c = flr.x(); c > ful.x(); c--)
+ if (bitmap->pixelsInRegion( Point(c, ful.y()),
+ Point(c, flr.y())))
+ {
+ if (flr.x() != c)
+ shrunk = 1;
+ flr.x() = c;
+ break;
+ }
+
+
+ return shrunk;
+}
+
+
+
+
+
+
+void Component::join(Component * comp)
+{
+
+ display_bounding_box("white");
+ comp->display_bounding_box("white");
+
+ ful.x() = ((ful.x() < comp->ul().x()) ? ful.x() : comp->ul().x());
+ ful.y() = ((ful.y() < comp->ul().y()) ? ful.y() : comp->ul().y());
+
+ flr.x() = ((flr.x() > comp->lr().x()) ? flr.x() : comp->lr().x());
+ flr.y() = ((flr.y() > comp->lr().y()) ? flr.y() : comp->lr().y());
+ display_bounding_box("blue");
+
+}
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Component.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,167 @@
+#ifndef _COMPONENT_H
+#define _COMPONENT_H
+#include "stdlib.h"
+#include <iostream.h>
+#include "Point.h"
+#include "system.h"
+#include "list.h"
+#include "RLEMap.h"
+
+
+class BitMap;
+
+class Component;
+
+class Components: public List
+{
+ public:
+ Components();
+ ~Components();
+
+ Component * compAt(Point p); // returns smallest component that contains
+ // this point.
+
+
+};
+
+class Component{
+ public:
+ friend class Word;
+ Component(Point ul, Point lr)
+ :ful(ul), flr(lr), fnumBits(0),fasciiId(NULL), fconfid(0),
+ charGroup(0), ffontId(0)
+ { fproperty = new Property[numProperties];
+ for (int i = 0; i < numProperties; i++)
+ fproperty[i] = 0;};
+
+ Component()
+ :ful(-1), flr(-1), fnumBits(0),fasciiId(0),fconfid(0),
+ charGroup(0), ffontId(0)
+ { fproperty = new Property[numProperties];
+ for (int i = 0; i < numProperties; i++)
+ fproperty[i] = 0;};
+ ~Component(){
+ if (fproperty != NULL) delete fproperty;
+ if (fasciiId != NULL) delete fasciiId;
+ }
+
+ Component * copy(); // make a new copy of the component
+ inline Point & ul() {return ful;};
+ inline Point & lr() {return flr;};
+
+ inline double width() {return (double)(flr.x() - ful.x() + 1); };
+ inline double height() {return (double)(flr.y() - ful.y() + 1); };
+ inline double area() { return width()*height(); };
+ inline int & numBits(){return fnumBits;};
+ short int charGroup; // values 0-3: 0=acemno, 1=gpqy, 2=dfhikl, 3=j([}
+ int vertShrink(BitMap * bitmap);
+ int horizontalShrink(BitMap * bitmap);
+
+ int AddToComponent(ListElement* intrvl, RLEMap* rlemap, int horizMerge);
+ /*--------------------------------------------------------------
+ Primary Purpose: Extend boundaries of component by connected intervals. Arguments: startIntrvl is interval to start from
+ Return Value: Number of intervals added.
+ Effects: Updates component's LR and UL. Deletes all added intervals.
+ ----------------------------------------------------------------*/
+
+ // Access entire property vector
+ inline Property * properties()
+ {return fproperty;}
+
+ // Set or read one value in property vector
+ inline Property & property(int p) // Possibly use overload []
+ {return fproperty[p];};
+
+ // Set or read ascii id;
+ inline Character & asciiId(){ return fasciiId[0];};
+
+ inline int asciiLen(){return strlen(fasciiId);}
+ // Set or read font id;
+ inline short int & fontId() {return ffontId;};
+
+ inline Confidence & confid(){ return fconfid; };
+ // Determine heuristic distance between this and comp
+ Distance distance(Component * comp);
+
+ Point Component::center() { return (Point((ful.x() + flr.x())/2,
+ (ful.y() +flr.y())/2)); };
+ // Use the BitMap map to set the property vector for
+ // this component
+
+
+ void setProperties(RLEMap * map);
+ void setProperties(BitMap * map);
+ void setSectionFlags(short int hflag[], short int vflag[]);
+
+
+ void display_bounding_box();
+ void display_bounding_box(char * color);
+ void display_bounding_box(char * color, double scaleFactor,
+ char * window);
+ Distance recognize(Component * learnedchars);
+ // find best match in learned characters. Set ascii value
+ // and return distance.
+
+
+ Distance recognize(Components * learnedgroups, bool allGroups=FALSE);
+ // find best match from learned character groups. Set ascii value
+ // and return distance.
+
+ void join(Component * comp);
+ inline bool operator < (Component * comp)
+ {if((ul().y() < comp->ul().y())) return TRUE;
+ if((ul().y() > comp->ul().y())) return FALSE;
+ // otherwise if y values are equal
+ return ((ul().x() <= comp->ul().x()) ? TRUE : FALSE);
+ }
+ inline bool operator > (Component * comp)
+ {if((ul().y() > comp->ul().y())) return TRUE;
+ if((ul().y() < comp->ul().y())) return FALSE;
+ // otherwise if y values are equal
+ return((ul().x() > comp->ul().x()) ? TRUE : FALSE);
+
+ }
+ inline bool xoverlap(Component * comp)
+ {
+ if (ful.x() <= comp->ul().x() && comp->ul().x() <= flr.x())
+ return TRUE;
+ if (comp->ul().x() < ful.x() && ful.x() < comp->lr().x())
+ return TRUE;
+ return FALSE;
+ }
+
+ Property * fproperty;
+ char * fasciiId;
+ private:
+ Point ful;
+ Point flr;
+ int fnumBits;
+
+
+
+ short int ffontId;
+ Confidence fconfid;
+};
+
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/EqnMarker.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,36 @@
+#ifndef _EqnMarker_H
+#define _EqnMarker_H
+
+#include "list.h"
+
+class EqnMarker {
+public:
+ EqnMarker()
+ :startline(0), startcol(0), endline(0), endcol(0) {};
+ EqnMarker(int sl, int sc, int el, int ec)
+ :startline(sl), startcol(sc), endline(el), endcol(ec) {};
+
+ ~EqnMarker(){};
+
+ int startline;
+ int startcol;
+ int endline;
+ int endcol;
+};
+
+
+class EqnMarkers :public List {
+public:
+ EqnMarkers()
+ :List() {};
+ ~EqnMarkers() {};
+
+};
+
+
+
+#endif
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/LineMarker.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,28 @@
+#ifndef _LINEMARKER_H
+#define _LINEMARKER_H
+/** LineMarker.h
+ There is a LineMarker array member of Page, which records the
+ starting and ending row of each line of text.
+**/
+
+class LineMarker {
+// Line Marker marks the starting and ending row of a line of text
+// in a Page
+
+ public:
+ friend class Page;
+ int fstartrow;
+ int fendrow;
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Makefile Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,124 @@
+#Makefile for OCRchie
+
+CC = g++
+#CC = purify -cache-dir=/tmp/ocrchie/cache -collector=/usr/sww/lib/gcc-lib/hppa1.1-hp-hpux9.03/2.6.3/ld g++
+
+#CC = quantify -cache-dir=/tmp/ocrchie/cache -collector=/usr/sww/lib/gcc-lib/hppa1.1-hp-hpux9.03/2.6.3/ld g++
+
+DEBUG = -g
+OPTIMIZE = -O
+CCFLAGS = $(DEBUG)
+EXECUTABLE = ocrchie
+
+# TCL root directory
+# on .cs machines
+TCLBASE = /usr/sww/tcl-7.4
+
+# X11 root directory
+X11BASE = /usr/sww/X11
+
+# math library directory
+MATHLIB = /lib/pa1.1
+
+# Place where object files need to be stored
+ODIR = /tmp/ocrchie/
+
+# That's it. Now just do a gmake.
+
+INCLUDE = -I${TCLBASE}/include -I${X11BASE}/include
+LIBDIRS = -L${TCLBASE}/lib -L${X11BASE}/lib -L${MATHLIB}
+LIBS = ${LIBDIRS} -ltk -ltcl -lX11 -lM -ltiff
+HEADERS = system.h list.h Component.h \
+ BitMap.h RLEPair.h RLEMap.h LineMarker.h Page.h Point.h \
+ convertMap.h Component.h Word.h tcl_interface.h \
+ EqnMarker.h Zone.h
+
+
+OFILES = $(ODIR)main.o $(ODIR)project.o $(ODIR)histogram.o \
+ $(ODIR)get_skew.o $(ODIR)tcl_interface.o $(ODIR)Zone.o \
+ $(ODIR)RLEMap.o $(ODIR)RLEPair.o $(ODIR)list.o \
+ $(ODIR)convertMap.o $(ODIR)Component.o $(ODIR)Page.o\
+ $(ODIR)BitMap.o $(ODIR)system.o $(ODIR)testocr.o \
+ $(ODIR)Word.o $(ODIR)learn.o $(ODIR)status_message.o \
+ $(ODIR)link.o
+
+#The Executable project
+ocrchie: $(OFILES)
+ $(CC) -g -o $(EXECUTABLE) ${INCLUDE} $(OFILES) $(LIBS)
+
+
+# The .o files compiled in ODIR because of space considerations
+
+$(ODIR)link.o: link.cc link.h
+ $(CC) $(CCFLAGS) ${INCLUDE} -o $(ODIR)link.o -c link.cc
+$(ODIR)status_message.o: status_message.cc status_message.h
+ $(CC) $(CCFLAGS) -o $(ODIR)status_message.o -c status_message.cc
+
+$(ODIR)system.o: system.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)system.o -c system.cc
+
+$(ODIR)Page.o: $(HEADERS) Page.cc RLEMap.h RLEPair.h BitMap.h
+ $(CC) $(CCFLAGS) -o $(ODIR)Page.o -c Page.cc
+
+$(ODIR)RLEMap.o: $(HEADERS) system.cc\
+ RLEMap.cc RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEMap.o -c RLEMap.cc
+
+$(ODIR)BitMap.o: $(HEADERS) system.cc BitMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)BitMap.o -c BitMap.cc
+
+$(ODIR)RLEPair.o: $(HEADERS) RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEPair.o -c RLEPair.cc
+
+$(ODIR)Component.o: $(HEADERS) Component.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Component.o -c Component.cc
+
+
+$(ODIR)Zone.o: $(HEADERS) Zone.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Zone.o -c Zone.cc
+
+
+$(ODIR)list.o: list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)list.o -c list.cc
+
+$(ODIR)Word.o: Word.h Word.cc Component.h list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Word.o -c Word.cc
+
+$(ODIR)convertMap.o: $(HEADERS) convertMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)convertMap.o -c convertMap.cc
+
+$(ODIR)learn.o: system.h learn.h learn.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)learn.o -c learn.cc
+
+$(ODIR)main.o: main.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)main.o -c ${INCLUDE} main.cc
+
+$(ODIR)testocr.o: testocr.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)testocr.o -c testocr.cc
+
+$(ODIR)project.o: project.cc project.h bitmap.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)project.o -c project.cc
+
+
+$(ODIR)histogram.o: histogram.cc histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)histogram.o -c histogram.cc
+
+$(ODIR)get_skew.o: get_skew.cc get_skew.h project.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)get_skew.o -c get_skew.cc
+
+$(ODIR)tcl_interface.o: tcl_interface.cc tcl_interface.h
+ $(CC) $(CCFLAGS) -o $(ODIR)tcl_interface.o -c ${INCLUDE} tcl_interface.cc
+
+#$(ODIR)deskew.o: deskew.cc deskew.h
+# $(CC) $(CCFLAGS) -c deskew.cc
+
+#$(ODIR)rotate.o: rotate.cc
+# $(CC) $(CCFLAGS) -c rotate.cc
+
+
+
+clean:
+ rm -f core *.o *~
+#
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Page.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,1492 @@
+/** Page.cc contains the member functions for the primary OCR class Page */
+#include "system.h"
+#include "Page.h"
+#include "convertMap.h"
+#include "get_skew.h"
+#include "Component.h"
+#include "status_message.h"
+
+/*** Member functions of class Page. ***/
+
+int Page::get_height()
+{
+ return fRLEMap->imageLength();
+}
+
+int Page::get_width()
+{
+ return fRLEMap->imageWidth();
+}
+
+int Page::get_linenum(int col, int row)
+ /*--------------------------------------------------------------
+Primary Purpose: Returns line number of x,y coordinates (just uses y for now)
+ called from proc equation_mark in new_ui.tcl
+Return value: line number or -1 if no line is here.
+Requires: setLines be run first
+Rev: 4/21/96
+---------------------------------------------------------------*/
+{
+ assert (flineinfo != NULL);
+ int linenum= -1;
+
+ for (int i = 0; i < fnumLines; i++)
+ if (flineinfo[i].fstartrow <= row && flineinfo[i].fendrow >= row)
+ {
+ linenum = i;
+ if (ENABLE_USER_INTERFACE)
+ {
+ // save last mark before it is overwritten
+
+ docommand("set curline %d",linenum);
+ docommand("set curline_startrow %d",flineinfo[i].fstartrow);
+ docommand("set curline_endrow %d",flineinfo[i].fendrow);
+
+
+ // this will change with zoning
+ docommand("set curline_startcol %d",0);
+ docommand("set curline_endcol %d",get_width());
+
+ }
+
+ break;
+
+ }
+return linenum;
+
+}
+
+int Page::send_words_to_tcl()
+/*--------------------------------------------------------------
+Primary Purpose: Display words in tcl
+Rev - AR
+---------------------------------------------------------------*/
+{
+ int word_count = 0;
+ int unknown_char_count = 0;
+ int low_precision_count = 0;
+ int mispelled_count = 0;
+ char* send_chars;
+ Word* temp_word;
+ if(ENABLE_USER_INTERFACE) set_status("Displaying text");
+ for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
+ {
+ word_count++;
+ set_text_display_status(word_count, fWordList->num_words);
+ temp_word = (Word*)ptr->item;
+ send_chars = backslashify(temp_word->characters);
+ /* printf("Added word %s Confidence = %d\n", send_chars,
+ temp_word->confid); */
+ if(temp_word->confid < VERY_LOW_CONFIDENCE)
+ {
+ docommand("addword \"%s\" %d %d UNKNOWN_CHAR", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ unknown_char_count++;
+ }
+ else if(temp_word->confid < LOW_CONFIDENCE)
+ {
+ docommand("addword \"%s\" %d %d LOW_PRECISION", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ low_precision_count++;
+ }
+ else if((temp_word->mispelled) && SPELLCHECK)
+ {
+ docommand("addword \"%s\" %d %d MISPELLED", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ mispelled_count++;
+ }
+ else
+ {
+ docommand("addword \"%s\" %d %d OK", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ }
+ update();
+ }
+ if(ENABLE_USER_INTERFACE)
+ {
+ set_status("Done displaying text");
+ set_status("Apparent word accuracy: %.3lf%%", (100 - (100 * ((double)(mispelled_count + unknown_char_count + low_precision_count) / (double)word_count))));
+ }
+}
+
+
+int Page::deskew(int deskew_method)
+/*--------------------------------------------------------------
+Primary Purpose: Deskew the page
+Arguments: 1 - RLE Rotation
+ 0 - BitMap Rotation
+Return Value: 1 if successful, 0 if unsuccessful
+Effects: updates the bitmap and rlemap of the page
+Constraints: RLEMap Rotation is not currently reliable and probably
+should not be used
+Rev: AR
+---------------------------------------------------------------*/
+{
+ /* a little ugly.... if the page is rotated
+ in here, return 1, else 0 */
+
+ if(deskew_method == RLE_DESKEW)
+ {
+ if(fRLEMap->deskew())
+ {
+ convertMap(fRLEMap, fBitMap);
+ return 1;
+ }
+ return 0;
+ }
+ else
+ {
+ double skew = get_skew(fRLEMap);
+ if((skew >= MINIMUM_SKEW_ANGLE)||(skew <= - MINIMUM_SKEW_ANGLE))
+ {
+ fBitMap->rotateMap(skew);
+ convertMap(fBitMap, fRLEMap);
+ return 1;
+ }
+ return 0;
+ }
+}
+
+Page::Page()
+/**Page::Page - constructor allocates bitmap and rlemap*/
+{
+ fBitMap = new BitMap;
+ fRLEMap = new RLEMap;
+ fEqnList = new EqnMarkers;
+ fLineComponents = NULL;
+ fWordList = NULL;
+}
+
+Page::~Page()
+/*--------------------------------------------------------------
+Primary Purpose: Destructor deallocates private fields that
+have been created.
+Rev:
+---------------------------------------------------------------*/
+{
+
+ if (flineinfo) delete flineinfo;
+ for (int i = 0; i <fnumLines; i++)
+ if(fLineComponents[i] != NULL) delete fLineComponents[i];
+ if(fLineComponents) delete fLineComponents;
+ if (fBitMap) delete fBitMap;
+ if (fRLEMap) delete fRLEMap;
+ if (fWordList) delete fWordList;
+ if (fEqnList) delete fEqnList;
+}
+
+Angle Page::skewAngle()
+/*--------------------------------------------------------------
+Primary Purpose: Determine the angle of rotation of the RLEMap r
+Arguments: pointer to an RLEMap
+Return Value: detected angle of rotation
+Code is in get_skew.cc
+Rev: AR
+---------------------------------------------------------------*/
+{
+ return get_skew(fRLEMap);
+}
+
+
+MapStatus Page::readMap(char * filename)
+ // Calls BitMap::readMap and then converts
+{
+ MapStatus status;
+ status = fBitMap->readMap(filename);
+ convertMap(fBitMap, fRLEMap);
+ return status;
+}
+
+
+
+MapStatus Page::setLines()
+/*--------------------------------------------------------------
+Primary Purpose: Set flineinfo array in Page class with the
+ starting and ending rows of each line of text.
+ Also sets fnumLines to the number of lines
+Arguments: none
+Return Value: A Mapstatus either VALID, EMPTY if there is no
+ data in the RLEMAP, or OTHERERROR if there is an unexpected error
+Effects: Allocates flineinfo and fills with starting and ending row
+ of each line. The following global variables are used as parameters
+ in this function. These are defined in system.cc
+ NoiseTolerance - Rows whose number of pixels is less than this value
+ will be considered empty (current val 6).
+ MinVertSeparation - The minimum number of rows separating lines of text.
+ Lines will be merged if actual Separation is less than this
+ value. (current val 3)
+ MinLineSize - The minimum number of rows in a line of text.
+ Any smaller lines are discarded (currentval 5)
+
+Constraints: Page::readMap() must be run first to fill fRLEMap
+Rev: 10/26 KM
+---------------------------------------------------------------*/
+{
+
+ int maxrow = fRLEMap->imageLength() - 1; // maximum row number
+ int actualSeparation = MinVertSeparation + 1; // must be bigger than min
+ // for line 0
+
+ int linenum=0; // current line number
+ int prvlinenum = 0;
+ int lineSize; // # rows in current line
+
+ int maxLines = maxrow/MinLineSize; // max # of lines of text
+
+ if(maxrow == 0) return EMPTY;
+
+ flineinfo = new LineMarker[maxLines];
+
+ for (int i = 0; i < maxrow;)
+ {
+ LineMarker & thisLine = flineinfo[linenum];
+ LineMarker & prevLine = flineinfo[prvlinenum];
+
+ while (i < maxrow && fRLEMap->row(i)->numPixels < NoiseTolerance)
+ i++;
+ thisLine.fstartrow = i++;
+ while (i < maxrow &&fRLEMap->row(i)->numPixels > NoiseTolerance)
+ i++;
+
+
+ lineSize = i - thisLine.fstartrow +1;
+
+ // If this line is less than MinVertSeparation away
+ // from the last line. Join the two together.
+ if (linenum > 0)
+ {
+ actualSeparation = thisLine.fstartrow - prevLine.fendrow;
+ }
+ if (actualSeparation < MinVertSeparation)
+ {
+ // If too small of a separation, add into prev row
+ prevLine.fendrow = i;
+ }
+ else if (lineSize >= MinLineSize)
+ {
+ thisLine.fendrow = i;
+/* printf (" Line %d Start: %d End: %d lineHeight %d\n",
+ linenum,thisLine.fstartrow,
+ thisLine.fendrow,
+ thisLine.fendrow - thisLine.fstartrow +1);
+*/
+ prvlinenum = linenum;
+ linenum++;
+
+ }
+ if (linenum >= maxLines) return OTHERERROR;
+ }
+
+ fnumLines = linenum; // Set number of lines in page class
+
+
+ if((ENABLE_USER_INTERFACE) && DISPLAY_LINE_BOUNDARIES)
+ {
+ display_line_boundaries();
+ }
+ /* printf("Setlines found a total of %d lines.\n", fnumLines); */
+ if(ENABLE_USER_INTERFACE)
+ update();
+ return VALID;
+ }
+
+void Page::display_line_boundaries()
+/*--------------------------------------------------------------
+Primary Purpose: Display line boundaries in TCL/TK. Called from
+setLines if ENABLE_USER_INTERFACE and DISPLAY_LINE_BOUNDARIES are
+set to TRUE
+Effects: Draws a blue line between each line of text
+Rev: AR
+---------------------------------------------------------------*/
+{
+ int centerline, width;
+ for(int j=0; j < fnumLines; j++)
+ {
+ centerline = (flineinfo[j].fendrow + flineinfo[j + 1].fstartrow) / 2;
+ width = flineinfo[j + 1].fstartrow - flineinfo[j].fendrow;
+
+ scale(centerline);
+ scale(width);
+ /* having this pathname here is probably not such a good idea...*/
+
+ docommand(".main_window.display.work_space create line %d %d %d %d -width %d -fill blue -tags {project_ray IMAGE_TAG} -stipple @/usr/sww/share/tclX-7.3a/tkX/3.6a/demos/bitmaps/grey.25", 0, centerline, bmap()->imageWidth(), centerline, width);
+ }
+
+}
+
+
+int test_rlemap_lines(RLEMap* rmap)
+{
+ int length = rmap->imageLength();
+ for(int i = 0; i < length; i++)
+ printf("On line %d, numpixels = %d\n", i, rmap->fMapData[i]->numPixels);
+}
+
+
+MapStatus Page::extractComponents(int horizMerge)
+/*--------------------------------------------------------------
+ Component extraction routines.
+*
+* Given the top and bottom line of a row we want to generate a list of
+* components. The general method is to find the closest dot, trace its
+* connected dots, then project upwards and downwards and add anything we
+* find there to the component. We will erase the component from the RLEMap
+* as it is added to the component list. By projecting up and down
+* from the piece we first find we should be able
+* to completely encompass characters like :;i?|! The only problems are
+* italic or ligatured characters where we may pick up two or more
+* characters at a time (which would be bad) or characters fragmented
+* with a vertical gap.
+
+Primary Purpose: Main extraction routine.
+Effects: Makes new components and puts them in a list. Deletes components
+ from RLE map. Fills in component boundaries and calls
+ Component::setProperties to set the property vector
+ Lastly convertMap is run to rebuild the RLEMap
+Constraints: Page::setLines() must be run first
+Rev: 4/28/96
+---------------------------------------------------------------*/
+{
+ int currentCol, startRow, endRow, rowHeight;
+ ListElement* intrvl;
+ ListElement* tempintrvl;
+ /* printf("fnumLines = %d\n", fnumLines); */
+ Component* comp;
+ int totalSpacing = 0; // total blank horizontal pixels between components
+ int baselines[MaxVertSize]; // array for finding the baseline
+ last_status = 0.0;
+ int compCounter = 0;
+ int i;
+ int j;
+ int upwardBound; // Projection distances different for equations
+ int downwardBound; // and non-equations
+
+
+ bool inEqn; // Variables for finding if the center of a comp
+ int centerx; // is in an equation.
+ int centery;
+
+
+
+ printf("Extracting Components\n");
+ fLineComponents = new Components*[fnumLines];
+ for (i = 0; i < fnumLines; i++) {
+ if(ENABLE_USER_INTERFACE)
+ set_component_status(i, fnumLines);
+ currentCol = 0;
+ startRow = flineinfo[i].fstartrow;
+ endRow = flineinfo[i].fendrow;
+ rowHeight = endRow - startRow;
+ assert(rowHeight > 0);
+
+ for (j=0; j < MaxVertSize; j++)
+ baselines[j] = 0;
+ fLineComponents[i] = new Components();
+
+
+ while (currentCol<=fRLEMap->imageWidth()) { //until we reach the end of the page
+
+ //Build component starting with closest black dot
+ intrvl = fRLEMap->FindNearHorizDot(currentCol, startRow, endRow);
+ if (intrvl == NULL) {
+ // printf("Reached end of line\n");
+ break;
+ }
+ comp = new Component(); //Make a new component named comp
+ assert(comp->AddToComponent(intrvl, fRLEMap, horizMerge));
+
+ //Now we want to extend upwards
+ //First check if there is a blank space to the right
+ tempintrvl =
+ fRLEMap->FindNearHorizDot(comp->lr().x(), startRow, endRow);
+
+
+ if (tempintrvl != NULL && ((RLEPair*) tempintrvl->item)->start >
+ comp->lr().x()+horizMerge+1)
+ while (comp->ul().y() < endRow) {
+
+ // find the center of the component to check if we are in an equation
+ centerx = (comp->ul().x() + comp->lr().x())/2;
+ centery = (comp->ul().y() + comp->lr().y())/2;
+ inEqn = inEquation(centerx, centery);
+ // Determine projection distance. Only project for non Equations.
+ if(inEqn)
+ {
+ upwardBound = comp->ul().y()+1;
+ downwardBound = comp->lr().y() - 1;
+ }
+ else
+ {
+ upwardBound = startRow;
+ downwardBound = endRow;
+ }
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(),
+ comp->lr().x(), comp->lr().y(),
+ upwardBound);
+ // startRow);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap,
+ horizMerge)))
+ break;
+ if (intrvl == NULL) break;
+ }
+ else
+ while (comp->ul().y() < endRow) {
+
+ // find the center of the component to check if we are in an equation
+ centerx = (comp->ul().x() + comp->lr().x())/2;
+ centery = (comp->ul().y() + comp->lr().y())/2;
+ inEqn = inEquation(centerx, centery);
+ // Determine projection distance. Only project for non Equations.
+ if(inEqn)
+ {
+ upwardBound = comp->ul().y()+1;
+ downwardBound = comp->lr().y() - 1;
+ }
+ else // regular text
+ {
+ upwardBound = startRow;
+ downwardBound = endRow;
+ }
+
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(),
+ comp->lr().x(), comp->ul().y(),
+ upwardBound);
+ // startRow);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap,
+ horizMerge)));
+ break;
+ if (intrvl == NULL) break;
+ }
+
+ //Now we want to extend downwards
+ while (comp->lr().y() > startRow) {
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(), comp->lr().x(),
+ comp->lr().y(), downwardBound);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap,
+ horizMerge)))
+ break;
+ if (intrvl == NULL) break;
+ }
+
+ // Now we toss out the noise
+ int size;
+ if (comp != NULL) {
+ if (comp->ul() < Point(0,0))
+ printf("Here's a problem. %d, %d\n", comp->ul().x(), comp->ul().y());
+ else
+ size = fBitMap->pixelsInRegion(comp->ul(), comp->lr());
+ }
+ else
+ size = 0;
+ Component * prev = (Component *)(fLineComponents[i]->last->item);
+ if (size < MinComponentSize) {
+// printf("Deleting some noise of size %d\n", size);
+ // printComponent(comp);
+ delete comp;
+ comp = NULL;
+ }
+ else if (prev != NULL &&
+ abs(comp->ul().x() - prev->ul().x()) <= 1 &&
+ abs(comp->lr().x() == prev->lr().x()) <= 1)
+ {
+ // Check and see if this and the previous component have the
+ // same x boundaries, if so merge the two. Good for = and :
+ prev->join(comp);
+ prev->setProperties(fBitMap);
+ delete comp;
+ comp ==NULL;
+ }
+ else
+ {
+ compCounter++;
+ // display a rectangle around the component
+ if(ENABLE_USER_INTERFACE)
+ {
+ if(DISPLAY_BOUNDING_BOXES)
+ comp->display_bounding_box();
+ }
+
+ // JMH - make an array of frequency of the y coord of bottom of comp
+ int vertOffset = endRow - comp->lr().y();
+ if(vertOffset < MaxVertSize && vertOffset >= 0)
+ baselines[vertOffset]++;
+
+
+ comp->setProperties(fBitMap);
+ if(fLineComponents[i]->last != NULL)
+ {
+ int thisSpacing = comp->ul().x() -
+ ((Component *) (fLineComponents[i]->last->item))->lr().x();
+ // if a realy big space, make space the width of this comp
+ if (thisSpacing > 200)
+ thisSpacing = 2*(comp->lr().x() - comp->ul().x());
+ totalSpacing += thisSpacing;
+ }
+
+ fLineComponents[i]->Append(comp); // add this component to list
+ currentCol = (comp->ul()).x() + 1; // update position on page
+ }
+ }
+
+ // find most popular bottom of comp and call it the baseline
+ int counter = 0;
+ int baseline;
+ for (j=0; j < MaxVertSize; j++) {
+ if (counter < baselines[j]) {
+ counter = baselines[j];
+ baseline = endRow - j;
+ }
+ }
+ // printf("For row %d to %d baseline = %d\n", startRow, endRow, baseline);
+ // Now assign each character a group based on it's location
+ for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL;
+ ptr = ptr->next) {
+ comp = (Component*) ptr->item;
+ comp->charGroup = 0;
+
+ // if top of char is higher than top - tolerance
+ if (comp->ul().y() < startRow + (rowHeight/TopLineTolerance)) {
+ comp->charGroup += 2; //tall like a T
+ }
+
+ // if bottom of char is lower than base - tolerance
+ if (comp->lr().y() > baseline + (rowHeight/BaseLineTolerance)) {
+ comp->charGroup += 1; //has a tail like a y
+ } else
+ if (comp->lr().y() < (baseline - (2*rowHeight/BaseLineTolerance))) {
+ comp->charGroup = 4; //floating like a '
+ /* printf("bottom at %d < %d\n", comp->lr().y(),
+ baseline - (2*rowHeight/BaseLineTolerance)); */
+ }
+ // printf("added character in group %d\n", comp->charGroup);
+ }
+ }
+ /* printf("Found %d components on this page.\n", compCounter); */
+ // printComponents();
+ last_status = 0.0;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done extracting characters");
+ if((compCounter - fnumLines) > 0) /* don't want divide by zero */
+ {
+ favgSpacing = totalSpacing / (compCounter - fnumLines);
+ }
+ else
+ {
+ favgSpacing = 1;
+ }
+ delete fRLEMap;
+ fRLEMap = new RLEMap;
+ convertMap(fBitMap, fRLEMap);
+}
+
+void Page::printComponents()
+/*--------------------------------------------------------------
+Primary Purpose: Debugging routine that prints little bitmaps
+of low confidence characters
+---------------------------------------------------------------*/
+{
+ int compcounter = 0;
+ for (int i = 0; i < fnumLines; i++) {
+ Component* comp;
+ for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL;
+ ptr = ptr->next) {
+ compcounter++;
+ comp = (Component *) ptr->item;
+ if (comp->confid() < (ConfidenceThreshold-20) && comp->asciiId() == 'n')
+ {
+ printf("Here's a poorly recognized component ul=%d,%d, lr=%d,%d.\n\n",
+ (comp->ul()).x(), (comp->ul()).y(),
+ (comp->lr()).x(), (comp->lr()).y());
+ printComponent(comp);
+ printf("properties: ");
+ printVector(comp->properties(), numProperties);
+ printf("I think it's a -> %c <- confidence: %d line: %d group: %d Comp#%d\n",
+ comp->asciiId(),
+ comp->confid(), i+1, comp->charGroup, compcounter);
+ printf("\n*******************************************************\n");
+ }
+ }
+ }
+}
+
+void Page::printComponent(Component* comp)
+// Print a single component.
+{
+ int right = comp->ul().x()+78;
+ if (comp->lr().x() < right)
+ right = comp->lr().x();
+
+ for (int r = comp->ul().y();
+ r <= comp->lr().y(); r++){
+ for (int c = comp->ul().x();
+ c <= right; c++)
+ bitprint(fBitMap->row(r)[c/8], c%8);
+ printf( "\n");
+ }
+}
+
+int spacing(ListElement * compa, ListElement * compb);
+// helper function for extractWords (defined below)
+
+MapStatus Page::extractWords()
+/*--------------------------------------------------------------
+Primary Purpose: Extract words from each lines components
+Effects: sets the fWordsList to be a list of all of the words
+in the document.
+Constraints: extractComponents must be run first
+Rev: KM 11/7/95
+---------------------------------------------------------------*/
+{
+ bool inWord;
+ ListElement * start; // word Start
+ int count; // counts the components in the word
+ int wordlength; // counts the characters in the word
+ int word_count = 0;
+ int spacingThreshold = (int) (1.25 * ((float) (favgSpacing)));
+ fWordList = new Words;
+ last_status = 0.0;
+ for (int i = 0; i < fnumLines; i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_extract_status(i, fnumLines);
+ inWord = FALSE;
+ for(ListElement *ptr = line(i)->first; ptr != NULL; ptr = ptr->next) {
+ Component * item = (Component *) ptr->item;
+ if(!inWord)
+ {
+ start = ptr;
+ count = 1;
+ if (item->fasciiId == NULL)
+ wordlength = 1;
+ else
+ wordlength = strlen(item->fasciiId);
+ inWord = TRUE;
+ }
+ if( spacing(ptr, ptr->next) > spacingThreshold ||
+ inEquation( ptr))
+ {
+ Word * newWord = new Word(start,count,wordlength);
+ (words())->Append(newWord);
+ if(1)
+ printf("%s ",newWord->characters);
+ inWord = FALSE;
+ word_count++;
+ }
+ else
+ count++;
+ if (item->fasciiId == NULL) wordlength ++;
+ else wordlength += strlen(item->fasciiId);
+ }
+ // Add in a separate word for new line
+ Word * newWord = new Word("\n",2);
+ (words())->Append(newWord);
+ printf("%s", newWord->characters);
+ word_count++;
+ }
+ last_status = 0.0;
+ fWordList->num_words = word_count;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done extracting words");
+ return VALID;
+}
+
+void Page::spellcheck()
+/*--------------------------------------------------------------
+Primary Purpose: Run spell checker on word list.
+Constraints: extractWords must be run first
+Rev: AR
+---------------------------------------------------------------*/
+{
+ int word_count = 0;
+ Word* temp_word;
+ for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
+ {
+ word_count++;
+ if(ENABLE_USER_INTERFACE)
+ set_spellcheck_status(word_count, fWordList->num_words);
+ temp_word = (Word*)ptr->item;
+ if(0)
+ printf("Spellchecking word %s\n", temp_word->characters);
+ if(mispelled(temp_word->characters))
+ {
+ temp_word->mispelled = TRUE;
+ }
+ }
+}
+
+int Page::spacing(ListElement * compa, ListElement * compb)
+// spacing from end of comp_a to begining of comp_b
+{
+ int x;
+ if (compb == NULL) return 1000; // end of line
+
+ Component * a = ((Component *) (compa)->item);
+ Component * b = ((Component *) (compb)->item);
+ int returnval = (b->ul().x() - a->lr().x());
+ if (returnval < 0)
+ {
+ return 0;
+ }
+ assert (returnval >= 0);
+ return returnval;
+
+}
+
+
+void Page::printWords()
+// Prits out each component of each word. This can take a very long time
+{
+
+ Word * thisWord;
+ for (ListElement * ptr = words()->first; ptr !=NULL; ptr= ptr->next)
+ {
+ thisWord = (Word *) ptr->item;
+ printf("!!!!!! NEW WORD %s confid : %d !!!!!\n", thisWord->characters, thisWord->confid);
+ for(int i = 0; i < thisWord->charCount; i++)
+ {
+ Component * comp = thisWord->character[i];
+ if (comp == NULL) continue;
+ printf("Printing a component ul=%d,%d, lr=%d,%d.\n\n",
+ (comp->ul()).x(), (comp->ul()).y(),
+ (comp->lr()).x(), (comp->lr()).y());
+ for (int r = comp->ul().y();
+ r <= comp->lr().y(); r++){
+ for (int c = comp->ul().x();
+ c <= comp->lr().x(); c++)
+ bitprint(fBitMap->row(r)[c/8], c%8);
+ printf( "\n");
+ }
+ printf("properties: ");
+ printVector(comp->properties(), numProperties);
+ printf("Identification: %c distance: %d confidence %d\n",
+ comp->asciiId(),
+ comp->distance(&LearnedChars[comp->asciiId()]),
+ comp->confid());
+ printf("\n***********************************************\n");
+ }
+ }
+}
+
+MapStatus Page::recognize()
+/*--------------------------------------------------------------
+Primary Purpose: Recognize entire page. Sets font and ascii id of
+each component
+Return Value: VALID if no error occurred OTHERERROR otherwise
+Constraints: extractComponents must be run first.
+See recognize(line) below for more detailed info
+Rev: KM
+---------------------------------------------------------------*/
+{
+ printf("Recognizing document\n");
+ last_status = 0.0;
+ for (int i = 0; i< fnumLines; i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_recognize_status(i, fnumLines);
+ recognize(i);
+ }
+
+ last_status = 0.0;
+ return VALID;
+
+}
+
+
+MapStatus Page::recognize(int linenum)
+/*--------------------------------------------------------------
+Primary Purpose: Recognize a line of connected components
+Arguments: linenum is line number to recognize
+Effects: sets ascii identification fontid and confidence in each component
+If confidence is low and character is big enough for two characters.
+divideAndRecognize is called to split up the component.
+Constraints: extractComponents must be run first
+Rev: KM 11/9/95
+---------------------------------------------------------------*/
+{
+ Component * comp;
+ Distance d;
+
+ for(ListElement *ptr = line(linenum)->first; ptr != NULL; ptr = ptr->next)
+ {
+ comp = (Component *) ptr->item;
+
+ d = comp->recognize(LearnedGroups);
+ if (comp->confid() < ConfidenceThreshold &&
+ comp->width() > 2*MinWidth) // really wide
+ divideAndRecognize(line(linenum), ptr, d);
+
+ /***
+ if (comp->confid() < ConfidenceThreshold ||
+ (ptr != line(linenum)->first &&
+ ((Component *) ptr->previous->item)->confid() < ConfidenceThreshold))
+ uniteAndRecognize(line(linenum), ptr, d);
+ ***/
+
+ }
+
+ return VALID;
+}
+
+
+
+void Page::divideAndRecognize (Components *list, ListElement * ptr, Distance d)
+/*--------------------------------------------------------------
+Primary Purpose: Identify and separate merged characters
+Arguments:ptr is a pointer to a list element containing a component
+ d is the current recognition distance on the component
+Effects: Subdivides component into two parts, Division is made at
+ the minimum vertical height of the component. If the
+ minHeight > JoinTolerance no divison will be made.
+ (JoinTolerance is a global var that determines
+ the maximum number of merged pixels that are allowed in a
+ column for a division to be made)
+ When a division is made. The component's boundaries are
+ adjusted accordingly and a new component is inserted into
+ the list.
+
+ Returns if distance is acceptable or width of component
+ is <= MinWidth*2
+Rev: KM 11/24/95
+---------------------------------------------------------------*/
+{
+ Component * comp = (Component *) ptr->item;
+ Component * newComp;
+ bool allGroups = TRUE;
+
+ // Save the original component boundaries just in case we cant improve
+ Point oldlr = comp->lr();
+ Point oldul = comp->ul();
+ int oldwidth = (int) comp->width();
+
+ // Some easy access x,y coordinates
+ int ulx = comp->ul().x();
+ int uly = comp->ul().y();
+ int lrx = comp->lr().x();
+ int lry = comp->lr().y();
+
+ Distance newdist, bestdist;
+ int bestlrx;
+
+ if (comp->confid() > ConfidenceThreshold)
+ return;
+
+ if (oldwidth < MinWidth*2) // cant be split in two
+ {
+ return;
+ }
+
+ // Determine where to split. Split at the thinnest point
+ // within JoinTolerance (maximum number of pixels that might be fused)
+
+ int minHeight = (int)comp->height();
+ bestlrx = comp->lr().x();
+ for(int i = MinWidth; i < oldwidth - MinWidth; i++)
+ {
+
+ int newHeight =
+ fBitMap->pixelsInRegion(Point(ulx+i,uly), Point(ulx+i,lry));
+ if (newHeight < minHeight)
+ {
+ minHeight = newHeight;
+ bestlrx = ulx+i;
+ }
+ }
+// printf("bestlrx = %d, minHeight = %d\n", bestlrx, minHeight);
+
+
+ if (bestlrx < lrx && minHeight < JoinTolerance)
+ {
+ comp->lr().x() = bestlrx;
+ int shrunk = comp->vertShrink(fBitMap);
+ comp->setProperties(fBitMap);
+ if (shrunk) // ignore group if we had to shrink down
+ newdist = comp->recognize(LearnedGroups, allGroups);
+ else
+ newdist = comp->recognize(LearnedGroups);
+
+// printf("Distance = %u asciiid = %c \n", newdist, comp->asciiId());
+
+ Component * newcomp = new Component(Point(bestlrx+1, oldul.y())
+ , oldlr);
+ newcomp->vertShrink(fBitMap);
+ newcomp->setProperties(fBitMap);
+ int newcompdist = newcomp->recognize(LearnedGroups,allGroups);
+
+ if ((newdist < d) && (newcomp->confid() > ConfidenceThreshold*.6))
+ {
+ list->insertAfter(ptr, newcomp);
+ newcomp->display_bounding_box("red");
+ comp->display_bounding_box("red");
+ }
+ else
+ {
+ comp->ul() = oldul;
+ comp->lr() = oldlr;
+ comp->setProperties(fBitMap);
+ comp->recognize(LearnedGroups);
+ delete newcomp;
+ }
+ return;
+ }
+
+
+ return;
+
+}
+
+
+void Page::uniteAndRecognize (Components *list, ListElement * ptr, Distance d)
+/*--------------------------------------------------------------
+Primary Purpose: Identify and merge a separated character
+Arguments:ptr is a pointer to a list element containing a component
+ d is the current recognition distance on the component
+Effects: Unite two components into one.
+
+Rev: 5/6/96
+---------------------------------------------------------------*/
+{
+ if (ptr->previous == NULL) return;
+ Component * part1 = (Component *) ptr->previous->item;
+ Component * part2 = (Component *) ptr->item;
+
+
+ Point ul, lr;
+ ul = part1->ul();
+ lr = part2->lr();
+ if (ul.y() > lr.y() || ul.x() > lr.x())
+ return;
+ Component * newcomp = new Component(ul, lr);
+
+ newcomp->setProperties(fBitMap);
+ if (part1->charGroup <= 3 && part2->charGroup <= 3)
+ newcomp->charGroup = (part1->charGroup | part2->charGroup);
+ else if (part1->charGroup == 4)
+ newcomp->charGroup = (part2->charGroup | 2);
+ else
+ newcomp->charGroup = (part1->charGroup | 2);
+ if (newcomp->charGroup > 4) newcomp->charGroup = 4;
+
+ int newdist = newcomp->recognize(LearnedGroups);
+
+ if (newdist < d && newcomp->confid() > ConfidenceThreshold)
+ {
+ list->removeAt(ptr->previous);
+ list->insertAfter(ptr, newcomp);
+ list->removeAt(ptr);
+ } else delete newcomp;
+
+return;
+
+}
+
+
+int Page::writeWordPos(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Writes word position, confidence, length and string to file
+Arguments: output file name
+Return Value: 1 if successful. 0 if an error occured
+Effects: Calls fWordList->printWordPos
+ // Output format for each word
+ "%6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
+ word->confid, word->charCount, word->characters
+Rev: 11/25/95
+---------------------------------------------------------------*/
+{ return fWordList->writeWordPos(filename);};
+
+int Page::writeWordbox(char * filename, int xoffset= 0, int yoffset = 0,
+ bool equationsOnly = FALSE)
+/*--------------------------------------------------------------
+Primary Purpose: Write out word to scanworks wordbox file
+Arguments: output file, xoffset, yoffset, equationsOnly bool if we only want
+equations.
+Return Value:
+Effects: calls fWordList->writeWordbox
+ // output format for each word
+ "%s %d %d %d %d %d %d %d % \n",
+ word->characters,
+ word->ul.x(), word->ul.y(),
+ word->lr.x(), word->lr.y(),
+ word->lr.x(), word->ul.y(),
+ word->ul.x(), word->lr.y() );
+ New line between lines of text
+Rev: 11/25/95
+---------------------------------------------------------------*/
+{ return fWordList->writeWordbox(filename, xoffset, yoffset, this, equationsOnly);};
+
+
+int Page::writeAscii(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word list to asii file
+Arguments: filename to write to
+Return Value: 1 if successful 0 if unsuccessful
+Effects: Calss fWordList->writeAscii(filename)
+Writes words to fill in text format using MinLineSize
+to differentiate lines.
+Rev: 11/25 KM
+---------------------------------------------------------------*/
+
+{return fWordList->writeAscii(filename);};
+
+
+
+int Page::addEquation(int startline, int startcol, int endline, int endcol)
+/*--------------------------------------------------------------
+Primary Purpose: Add an equation to the equation list
+Arguments: boundaries of equation
+Effects: Adds an element fEqnList
+Rev: 4/21/96
+---------------------------------------------------------------*/
+{
+ EqnMarker * newEqn = new EqnMarker(startline, startcol, endline, endcol);
+ fEqnList->SortedInsert(newEqn, startline);
+}
+
+int Page::deleteEquation(int col, int row)
+/*--------------------------------------------------------------
+Primary Purpose: deletes equations with this coordinate.
+Arguments: coordinate of equation to remove
+Return Value: 1 if element was remove, 0 otherwise
+Effects: removes any equation containing this coordinate
+Rev: 4/21/96
+---------------------------------------------------------------*/
+{
+ // first determine line number.
+ int linenum;
+
+ for (int i = 0; i < fnumLines; i++)
+ if (flineinfo[i].fstartrow <= row && flineinfo[i].fendrow >= row)
+ {
+ linenum = i;
+ break;
+ }
+
+ for(ListElement *ptr = fEqnList->first; ptr != NULL; ptr = ptr->next)
+ {
+ EqnMarker * eqn = (EqnMarker *) ptr->item;
+ if (linenum == eqn->startline && linenum == eqn->endline)
+ {
+ if (col >= eqn->startcol && col <= eqn->endcol)
+ {
+ delete eqn;
+ setTclDeleteVars(eqn);
+ fEqnList->removeAt(ptr);
+ return 1;
+ }
+ }
+ else if (linenum == eqn->startline && col >= eqn->startcol)
+ {
+ delete eqn;
+ setTclDeleteVars(eqn);
+ fEqnList->removeAt(ptr);
+ return 1;
+ }
+ else if (linenum > eqn->startline && linenum < eqn->endline)
+ {
+ delete eqn;
+ setTclDeleteVars(eqn);
+ fEqnList->removeAt(ptr);
+ return 1;
+ }
+ else if (linenum == eqn->endline && col <= eqn->endcol)
+ {
+ delete eqn;
+ setTclDeleteVars(eqn);
+ fEqnList->removeAt(ptr);
+ return 1;
+ }
+
+ }
+
+ return 0;
+
+
+}
+
+void Page::setTclDeleteVars(EqnMarker * eqn)
+{
+
+if (ENABLE_USER_INTERFACE)
+ {
+ docommand("set deleted 1");
+ docommand("set curline %d",eqn->endline);
+ docommand("set curline_startrow %d",flineinfo[eqn->endline].fstartrow);
+ docommand("set curline_endrow %d",flineinfo[eqn->endline].fendrow);
+ docommand("set curx %d", eqn->endcol);
+
+ // prevlines are actually starting lines but allowed same use of
+ // tcl add equation code
+ docommand("set prevline %d",eqn->startline);
+ docommand("set prevline_startrow %d",flineinfo[eqn->startline].fstartrow);
+ docommand("set prevline_endrow %d",flineinfo[eqn->startline].fendrow);
+ docommand("set prevx %d", eqn->startcol);
+
+
+
+ // this will change with zoning
+ docommand("set curline_startcol %d",0);
+ docommand("set curline_endcol %d",get_width());
+
+
+ }
+
+
+}
+
+Component * Page::compAt(Point p)
+/*--------------------------------------------------------------
+Primary Purpose: Calls Components::compAt to return the smallest
+ component containing point p
+Return Value: Pointer to the component or null if no component here
+Effects:
+Rev: 4/25/96
+---------------------------------------------------------------*/
+{
+ Component * returnComp= NULL;
+ int linenum = get_linenum(p.x(), p.y() );
+
+ if (linenum >= 0)
+ {
+ Components * complist = line(linenum);
+ returnComp = complist->compAt(p);
+ }
+ if (returnComp == NULL)
+ printf("No component found at ( %d, %d)\n ", p.x(), p.y());
+ else
+ printf("Component found at ( %d, %d)\n ul = (%d, %d) lr = (%d, %d)\n "
+ , p.x(), p.y(),returnComp->ul().x(),returnComp->ul().y(),
+ returnComp->lr().x(),returnComp->lr().y());
+
+
+ return returnComp;
+}
+
+
+bool Page::inEquation(int col, int row)
+/*--------------------------------------------------------------
+Primary Purpose: determine if x,y is in an equation
+Arguments: x,y coordinates
+Return Value: true if in an Equation, false otherwise
+Effects: determines if equation with these coordinated is in fEqnList
+Rev: 11/25/95
+---------------------------------------------------------------*/
+{
+ // first determine line number.
+ int linenum = get_linenum(col, row);
+
+
+ for(ListElement *ptr = fEqnList->first; ptr != NULL; ptr = ptr->next)
+ {
+ EqnMarker * eqn = (EqnMarker *) ptr->item;
+ if (linenum == eqn->startline && linenum == eqn->endline)
+ {
+ if(col >= eqn->startcol && col <= eqn->endcol)
+ return true;
+ }
+ else if (linenum == eqn->startline && col >= eqn->startcol)
+ return true;
+ else if (linenum > eqn->startline && linenum < eqn->endline)
+ return true;
+ else if (linenum == eqn->endline && col <= eqn->endcol)
+ return true;
+ }
+ return false;
+
+}
+
+bool Page::inEquation(ListElement * comp)
+/*--------------------------------------------------------------
+Primary Purpose: determine if the component in this list element
+ is in an equation
+Arguments: A list element from a component list
+Return Value: true if in equation, false otherwise
+Effects: calls inEquation(x,y) to do the real work
+Rev: 4/21/96
+---------------------------------------------------------------*/
+{
+ Component * c = (Component *) comp->item;
+ return inEquation(c->ul().x(), c->ul().y());
+}
+
+
+int Page::writeEquations(char * filename, int lineOffset)
+/*--------------------------------------------------------------
+Primary Purpose: Writes boundaries of equations
+Arguments: output file name
+Return Value: 1 if successful 0 otherwise
+Effects: Outputs to filename for each equation
+int startline, int startcol, int endline, int endcol <CR/LF>
+Rev: 11/25/95
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+
+ for(ListElement *ptr = fEqnList->first; ptr != NULL; ptr = ptr->next)
+ {
+ EqnMarker * eqn = (EqnMarker *) ptr->item;
+ fprintf(outfile, " %6d %6d %6d %6d\n", eqn->startline+lineOffset,
+ eqn->startcol,
+ eqn->endline+lineOffset, eqn->endcol);
+ }
+fclose(outfile);
+return 1;
+}
+
+void Page::join(Component * a, Component * b)
+{
+ if (a == b) return;
+ Component * primary;
+ Component * secondary;
+
+ primary = (( a < b) ? a : b);
+ secondary = ((primary == a) ? b : a);
+ assert(primary != secondary);
+ assert(get_linenum(a) == get_linenum(b));
+
+ primary->join(secondary);
+
+ // remove secondary component from component list.
+ int linenum = get_linenum(secondary);
+ line(linenum)->removeElement(secondary);
+
+
+}
+
+
+
+
+
+int Page::thinnestHorizontalSplit(Components * complist,
+ ListElement * compptr)
+/*--------------------------------------------------------------
+Primary Purpose: Splits this component at thinnest point
+Arguments: the component list that contains the compoent and
+ a pointer to its listelement
+Return Value: 1 if split performed 0 otherwise.
+Effects: Adds a new component to the list
+Constraints:
+Rev: 4/26
+---------------------------------------------------------------*/
+{
+ Component * comp = (Component *) compptr->item;
+ // Some easy access x,y coordinates
+ int ulx = comp->ul().x();
+ int uly = comp->ul().y();
+ int lrx = comp->lr().x();
+ int lry = comp->lr().y();
+
+ int bestlrx;
+
+ // Determine where to split. Split at the thinnest point
+ // within JoinTolerance (maximum number of pixels that might be fused)
+
+
+ int minHeight = (int)comp->height();
+ int oldwidth = (int) comp->width();
+
+ bestlrx = comp->lr().x();
+ // MinWidth is the minimum width of a learned charcter
+ for(int i = MinWidth; i < oldwidth - MinWidth; i++)
+ {
+
+ int newHeight =
+ fBitMap->pixelsInRegion(Point(ulx+i,uly), Point(ulx+i,lry));
+ if (newHeight < minHeight)
+ {
+ minHeight = newHeight;
+ bestlrx = ulx+i;
+ }
+ }
+// printf("bestlrx = %d, minHeight = %d\n", bestlrx, minHeight);
+ horizontalCompSplit(complist, compptr, bestlrx);
+
+
+}
+
+int Page::thinnestHorizontalSplit(Component * comp)
+{
+
+ int i = get_linenum(comp);
+
+ Components * complist = fLineComponents[i];
+ ListElement * compptr;
+
+ for (ListElement * ptr = complist->first; ptr != NULL; ptr = ptr->next)
+ {
+ if ((Component *) (ptr->item) == comp)
+ {
+ compptr = ptr;
+ thinnestHorizontalSplit(complist, compptr);
+ return 1;
+ }
+ }
+ return 0;
+
+ }
+
+int Page::horizontalCompSplit(Components * complist,
+ ListElement * compptr, int x)
+/*--------------------------------------------------------------
+Primary Purpose: Split this component in the list into two components
+ at the indicated x coordinate
+Arguments: x coordinate of splite
+Return Value: 1 if split is performed 0 otherwise
+Effects: Adds a new element to the list. One component is split into two
+Constraints: fulx <= x >= flrx
+Rev: 4/26/96
+---------------------------------------------------------------*/
+
+{
+ Component * comp = (Component *) compptr->item;
+ bool allGroups = TRUE;
+ comp->display_bounding_box("white");
+
+ if( x < comp->ul().x() || x > comp->lr().x())
+ {
+ cout << " Cant split component " << x << "is not between"
+ << comp->ul().x() << "and" << comp->lr().x() << endl;
+ return 0;
+ }
+ else
+ {
+ Component * newcomp = new Component(Point(x,comp->ul().y()),
+ comp->lr());
+ comp->lr().x() = x-1;
+ int compShrunk = comp->vertShrink(fBitMap);
+ comp->setProperties(fBitMap);
+ if(compShrunk)
+ comp->recognize(LearnedGroups, allGroups);
+ else
+ comp->recognize(LearnedGroups);
+
+
+ int newCompShrunk = newcomp->vertShrink(fBitMap);
+ newcomp->setProperties(fBitMap);
+
+ if(newCompShrunk) // ignore group if shrunk
+ newcomp->recognize(LearnedGroups, allGroups);
+ else
+ newcomp->recognize(LearnedGroups);
+
+ complist->insertAfter(compptr, newcomp);
+ comp->display_bounding_box("blue");
+ newcomp->display_bounding_box("blue");
+
+ return 1;
+ }
+
+
+}
+
+
+ZonedPage::ZonedPage()
+ :Page(){ fzones = new Zones();}
+
+ZonedPage::~ZonedPage()
+{
+ ((Page *)this)->~Page();
+ delete fzones;
+}
+
+Zones * ZonedPage::zones()
+ { return fzones; }
+
+Page * ZonedPage::activate(int x, int y)
+ // activate the page at Point(x,y)
+{
+ Zone * activeZone = zones()->findZone(x,y);
+ if (activeZone == NULL) return NULL;
+ docommand("set cur_xoffset %d", activeZone->ul().x());
+ docommand("set cur_yoffset %d", activeZone->ul().y());
+
+ if (activeZone->page() == NULL)
+ {
+ activeZone->buildPage(this);
+ }
+
+ return activeZone->page();
+
+}
+
+void ZonedPage::autoZone(int horizMerge, int vertMerge)
+{ // autoZone tries to automatically zone page
+ Point curul;
+ Point curlr;
+ int changed = 1;
+
+ if (components() != NULL)
+ delete components();
+
+ extractComponents(horizMerge);
+
+ while(changed)
+ {
+ changed = 0;
+ for (int i=0; i < numLines(); i++)
+ {
+ for(ListElement * mptr = line(i)->first; mptr != NULL; mptr=mptr->next)
+ {
+ Component * mainitem = (Component *) mptr->item;
+ for (int j= i; j < numLines(); j++)
+ for(ListElement * ptr = line(j)->first; ptr != NULL; ptr=ptr->next)
+ {
+
+ Component * item = (Component *) ptr->item;
+ if( (item->ul().y() - mainitem->lr().y()) <= vertMerge &&
+ (mainitem != item) &&
+ mainitem->xoverlap(item))
+ {
+ mainitem->join(item);
+ (line(j))->removeAt(ptr);
+ changed = 1;
+ }
+ }
+ }
+ }
+ }
+
+
+ for (int i=0; i < numLines(); i++)
+ {
+ for(ListElement * mptr = line(i)->first; mptr != NULL; mptr=mptr->next)
+ {
+ Component * mainitem = (Component *) mptr->item;
+ /* printf(" (ul(%d,%d) lr(%d,%d)) ", mainitem->ul().x(),
+ mainitem->ul().y(), mainitem->lr().x(), mainitem->lr().y());
+ */
+ /*
+ mainitem->display_bounding_box("blue",
+ ZONING_SCALE_FACTOR,
+ ".zoning_window.work_space");
+ */
+ Point ul = Point(mainitem->ul().x() -1,mainitem->ul().y() -1);
+ Point lr = Point( mainitem->lr().x() +1, mainitem->lr().y() +1);
+ docommand("start_region %d %d", (int)(ul.x()*ZONING_SCALE_FACTOR),
+ (int)(ul.y()*ZONING_SCALE_FACTOR));
+
+ docommand("end_region %d %d", (int)(lr.x()*ZONING_SCALE_FACTOR),
+ (int)(lr.y()*ZONING_SCALE_FACTOR));
+
+ Zone * newzone = new Zone(ul,lr);
+ zones()->Append(newzone);
+ }
+
+
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Page.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,188 @@
+/* Page.h
+ The Page class is the primary class used for the OCR system
+ It has two data representations of a page of text, an RLEMap
+ and a BitMap.
+
+*/
+
+#ifndef _PAGE_H
+#define _PAGE_H
+
+#include "system.h"
+#include "EqnMarker.h"
+#include "Component.h"
+#include "RLEMap.h"
+#include "BitMap.h"
+#include "LineMarker.h"
+#include "Word.h"
+#include "tcl_interface.h"
+#include "Zone.h"
+
+class Zones;
+
+class Page {
+public:
+ // Constructor, Destructor
+ friend main();
+ friend void testocr(int argc, char ** argv);
+ Page();
+ ~Page();
+
+
+ // Read is from 2 level TIFF files.
+ // Calls BitMap function. readMap then converts to RLE
+
+ MapStatus readMap(char * filename); // Calls BitMap::readMap
+
+ Angle skewAngle(); // returns skew estimate
+ int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
+
+ MapStatus setLines();
+ /* Sets fnumlines to the # of text lines
+ and dimensions and sets flineinfo for start and end row
+ for each line. */
+ void Page::display_line_boundaries();
+ /* highlights the space between lines of text in TCL//TK */
+
+ MapStatus extractComponents(int horizMerge);
+ /* Extract Component information for each line of text
+ Does connected component analysis then projects up and
+ down to catch circumflexes. A component list is created
+ for each line of text Assumes image has already been deskewed
+ using deskew and that setLines has been run to determine
+ text line boundaries ***/
+
+ MapStatus recognize();
+ /* Recongnize whole page. Run after extractComponents.
+ learn() or readLearnedGroups() must also be run
+ before this function **/
+
+ MapStatus recognize(int linenum); // just one line
+ /* Recognize characters
+ Perform Character Recogition on a line of components.
+ Use the global variable LearnedGroups for comparison.
+ ***/
+
+ MapStatus extractWords();
+ /* Find the start and end of words using avgSpacing and
+ add to word list fWordList */
+
+ void spellcheck();
+ /* spellcheck the list of words (set the mispelled field
+ in each word) */
+
+ int send_words_to_tcl();
+ /* Send words to user interface */
+
+ /* Output options */
+ int writeWordPos(char * filename);
+ /* Write upper left point coordinates, confidence and translation to
+ file*/
+ int writeWordbox(char * filename, int xoffset, int yoffset,
+ bool equationsOnly);
+ /* Write out to scanwrx format */
+
+ int writeAscii(char * filename);
+ /** Write words out to acii file **/
+
+ int writeEquations(char * filename, int lineoffset);
+ /*** Write boundaries of equations */
+
+
+ void printComponents();
+ /* Prints out a little bitmap for each bad component in the list.
+ Uses ConfidenceThreshold as a cutoff for printing characters.
+ Just used for debugging*/
+ void printComponent(Component* comp);
+
+ void Page::printWords();
+ /* prints out bitmap for each component delimiting between words.*/
+
+
+ int get_height();
+ int get_width();
+ int get_linenum(int x, int y); // returns line number of x,y coordinates
+ int get_linenum(Point p) { return get_linenum(p.x(), p.y()); }
+ int get_linenum(Component * comp) {return get_linenum(comp->center()); }
+
+ inline BitMap * bmap() {return fBitMap;};
+ inline RLEMap * rmap() {return fRLEMap;};
+ inline Words * words() {return fWordList;};
+ inline Components ** components() {return fLineComponents;};
+ inline Components * line(int i) {return fLineComponents[i];};
+
+ int numLines() {return fnumLines;};
+ LineMarker * lineinfo() {return flineinfo;};
+ int avgSpacing() {return favgSpacing;};
+ /* Functions to handle equation marking */
+ int addEquation(int startline, int startcol, int endline, int endcol);
+ int deleteEquation(int x,int y); // deletes equation with this coordinate.
+
+ Component * compAt(Point p); // returns pointer to smallest
+ //comp containing p
+ bool inEquation(int x, int y);
+ bool inEquation(ListElement * comp);
+ void join(Point a, Point b) { join(compAt(a), compAt(b));};
+ void join(Component * a, Component * b);
+
+ int thinnestHorizontalSplit(Components * complist,
+ ListElement * compptr);
+
+ int thinnestHorizontalSplit(Component * comp);
+
+ int horizontalCompSplit(Components * complist,
+ ListElement * compptr, int x);
+ int verticalSplit(Components * complist,
+ ListElement * compptr, int y);
+
+private:
+ int fnumLines; // Number of lines
+ LineMarker * flineinfo; // for each line - start and end row
+ // in RLEMap
+ int favgSpacing ; // Avg spacing between comp (in pixels)
+ Components ** fLineComponents; // A list of components for each line
+
+ Words * fWordList; // A list of words in the document
+ RLEMap * fRLEMap; // Pointer to an RLEMap represntation
+ BitMap * fBitMap; // Pointer to BitMap representation
+ EqnMarkers * fEqnList; // pointer to equation list
+ int spacing(ListElement * compa, ListElement * compb);
+ // helper function for extractWords
+ // Returns # of horizontal blank pixels between 2 components
+ void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
+ void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
+ void setTclDeleteVars(EqnMarker * eqn); // helper function for deleteEquation
+
+
+};
+
+class ZonedPage:public Page
+{
+public:
+ ZonedPage();
+ ~ZonedPage();
+ void autoZone(int horizMerge, int vertMerge);
+ Page * activate(int x, int y); // activate the page at Point(x,y)
+ void recognizeAll();
+ Zones * zones();
+
+private:
+ Zones * fzones;
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Point.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,53 @@
+#ifndef _POINT_H
+#define _POINT_H
+
+class Point{
+ public:
+ Point(int xCoord = -1, int yCoord= -1)
+ : fx(xCoord), fy(yCoord) {};
+
+ inline int & x(){return fx;}
+ inline int & y() {return fy;};
+
+
+ // Relational operators
+
+ inline bool operator ==(Point p)
+ { return (fx == p.x() && fy == p.y());};
+ inline bool operator !=(Point p)
+ { return (fx != p.x() || fy != p.y());};
+ inline bool operator < (Point p)
+ { return (fx < p.x()) && (fy < p.y()); };
+ inline bool operator > (Point p)
+ { return (fx > p.x()) && (fy > p.y()); };
+ inline bool operator <=(Point p)
+ { return (fx <= p.x()) && (fy <= p.y()); };
+ inline bool operator >=(Point p)
+ { return (fx >= p.x()) && (fy >= p.y()); };
+
+ private:
+ int fx;
+ int fy;
+};
+
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/RLEMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,578 @@
+#include "system.h"
+#include "RLEMap.h"
+#include "RLEPair.h"
+#include "tcl_interface.h"
+#include "status_message.h"
+
+ /* *****************************************************************
+ * RLEMap.cc - Member functions for an RLEMap *
+ *
+ * RLEMap() - Constructor
+ * ~RLEMap() - Destructor
+ *
+ * int imageLength();
+ * int imageWidth();
+ * MapStatus & status;
+ *
+ * Below is an index of the other functions and the files where they
+ * appear.
+ *
+ * MapStatus readMap(char * filename) - RLEMap_readMap.cc
+ * MapStatus WriteMap(char * filename);
+ *
+ * // Data Access and low level manipulation functions
+ * RLEPairs * row(int i) - Returns a pointer to the list of RLEPairs
+ * for row i.
+ * MapStatus setBit(Point point, Color clr);
+ * Color readBit(Point point);
+ *
+ *
+ ***************************************************************/
+
+RLEMap::RLEMap()
+: fMapData(NULL), fImageLength(0), fImageWidth(0), fStatus(EMPTY)
+/*--------------------------------------------------------------
+Primary Function: Constructor
+Return Value: pointer to new RLEMap
+Effects: Initialize status to empty other values to zero
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{ }
+
+
+
+ RLEMap::~RLEMap()
+/*--------------------------------------------------------------
+Primary Purpose: destructor
+Effects: Deletes each row of RLEPairs then the array of rows
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ if (fMapData != NULL)
+ {
+ int i;
+
+
+ // delete each row
+ for (i=0; i< fImageLength; i++)
+ {
+ delete fMapData[i];
+ }
+ // delete array of rows
+ delete fMapData;
+ }
+};
+
+int & RLEMap::imageLength()
+/*--------------------------------------------------------------
+Return Value: vertical length of image in pixels
+Constraints: readMap() must have been run and fStatus be VALID
+Rev: 10/6 KM
+---------------------------------------------------------------*/
+ {
+ return fImageLength;
+
+};
+
+
+int & RLEMap::imageWidth()
+/*--------------------------------------------------------------
+Return Value: horizontal width of image in pixels
+Constraints: readMap() must have been run and fStatus be valid
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ return fImageWidth;
+
+}
+
+
+MapStatus & RLEMap::status()
+/*--------------------------------------------------------------
+Return Value: return reference to current status EMPTY, VALID etc..
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ return fStatus;
+
+}
+
+
+
+RLEPairs * RLEMap::operator [](int i)
+/*--------------------------------------------------------------
+Arguments: i is the row # of the RLEPair list to be returned
+Return Value: A pointer to the list of RLEPairs in row i
+Rev: 10/20/95 KM
+---------------------------------------------------------------*/
+{
+
+ return fMapData[i];
+}
+
+
+RLEPairs * RLEMap::row(int i)
+// Same as overloaded [] function above
+{
+ return fMapData[i];
+}
+
+
+
+MapStatus RLEMap::readMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read an RLEMap from a TIFF file
+Arguments: filename of TIFF file
+Return Value: A MapStatus, either VALID or READERROR
+Effects:
+ * RLEMap::readMap(filename) will read a two level TIFF file
+ * and place it in an RLEMap. The private fields of the RLEMap
+ * set are:
+ fImageWidth - the pixel width of the image
+ fImageLength - the vertical pixel length of the image
+ fstat - the status of the image VALID or READERROR
+ fMapData - an array of pointers to lists of RLEPairs
+Constraints: filename must be a two level TIFF file
+Rev: 10/20/95 Portions Borrowed from Assignment 1
+---------------------------------------------------------------*/
+{
+ TIFF *tif;
+ unsigned char * buf;
+ short photometric;
+
+ // Open File - Read length and width
+
+ tif = TIFFOpen (filename, "r");
+ if(tif == NULL)
+ return READERROR;
+
+ TIFFGetField (tif, TIFFTAG_IMAGELENGTH, &fImageLength);
+ TIFFGetField (tif, TIFFTAG_IMAGEWIDTH, &fImageWidth);
+ TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photometric);
+
+ printf("open succeeded on file %s. length = %d. width = %d ",
+ filename, fImageLength, fImageWidth);
+ if(photometric == PHOTOMETRIC_MINISWHITE)
+ printf("min-is-white format\n");
+ else if(photometric == PHOTOMETRIC_MINISBLACK )
+ printf("min-is-black format\n");
+ else
+ printf("with an unknown photometric: %d\n", photometric);
+
+ // allocate buffer and array for data
+ int numCharsInBuf = fImageWidth / 8 +1 ;
+ buf = new unsigned char[numCharsInBuf];
+ fMapData = new (RLEPairs*)[fImageLength+1];
+
+ for (int row = 0; row < fImageLength; ++row)
+ {
+ TIFFReadScanline(tif,buf,row,0);
+ if(photometric != PHOTOMETRIC_MINISWHITE) /* invert anything except white */
+ invertBitsInBuffer(buf, numCharsInBuf);
+ if(row==0 || row == fImageLength -1)
+ clearBitsInBuffer(buf,numCharsInBuf);
+ // Create a list of RLEPairs for this row and fill with buffer data
+ fMapData[row] = new RLEPairs(row);
+ fMapData[row]->fill(buf, numCharsInBuf, row);
+ }
+
+ TIFFClose(tif);
+
+ return VALID;
+}
+
+short int RLEMap::grayScale(Point ul, Point lr)
+// Dummy function for now
+{
+ int numPixels = pixelsInRegion( ul, lr);
+ int area = (lr.x() - ul.x()+1) * (lr.y() - ul.y()+1);
+ if (area < numPixels) {
+ printf("Uh oh! Area = %d and pixels = %d\n", area, numPixels);
+ assert(area >= numPixels);
+ }
+ short int gscale =(short int)(((float)numPixels/area) * 255);
+
+ return gscale;
+ }
+
+int RLEMap::pixelsInRegion(Point ul, Point lr)
+{
+ assert (ul >= Point(0,0));
+ assert (ul <= lr);
+ assert (lr <= Point(fImageWidth, fImageLength));
+
+ int ulx = ul.x(); int uly = ul.y();
+ int lrx = lr.x(); int lry = lr.y();
+ int numPixels = 0;
+ RLEPairs * curRow;
+
+ for (int r = uly; r <= lry; r++)
+ {
+ curRow = row(r);
+ numPixels += curRow->pixelsBetween(ulx, lrx);
+// cout << curRow->pixelsBetween(ulx,lrx) <<" ";
+ // cout << numPixels << endl;
+ }
+
+ return numPixels;
+
+}
+
+ListElement*
+RLEMap::FindNearVertDot(int startCol, int endCol, int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Return closest interval to startRow within bounds of
+ startCol and endRow in the direction of endRow. Finds
+ closest dot vertically from startRow.
+Arguments: startRow is row to start from, startCol and endCol are
+ left and right boundaries of search. Search in the direction
+ of endRow.
+Return Value: An RLE interval - pointer to a list element in RLEPairs
+Effects:
+Constraints: startRow < endRow
+---------------------------------------------------------------*/
+{
+ ListElement* current;
+ if (startRow < endRow) {
+ for (int i = startRow+2; i <= endRow; i++) {
+ current = fMapData[i]->first;
+ while (current != NULL) {
+ if ((((RLEPair *) current->item)->start <= endCol)
+ && (((RLEPair *) current->item)->end >= startCol))
+ return current;
+ current = current->next;
+ }
+ }
+ } else {
+ for (int i = startRow-2; i >= endRow; i--) {
+ current = fMapData[i]->first;
+ while (current != NULL) {
+ if ((((RLEPair *) current->item)->start <= endCol)
+ && (((RLEPair *) current->item)->end >= startCol))
+ return current;
+ current = current->next;
+ }
+ }
+ }
+ return NULL;
+}
+
+
+
+
+
+ListElement*
+RLEMap::FindNearHorizDot(int startCol, int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Return closest interval to startCol within bounds of
+ startRow and endRow (startRow is lower). Finds
+ closest dot horizontally from startCol.
+Arguments: startCol is column to start from, startRow and endRow are
+ upper and lower boundaries of search
+Return Value: An RLE interval - pointer to a list element in RLEPairs
+Effects:
+Constraints: startRow < endRow
+---------------------------------------------------------------*/
+{
+ ListElement* answer = NULL;
+ ListElement* current;
+ int closest = fImageWidth;
+
+ for (int i = startRow; i <= endRow; i++) {
+ current = fMapData[i]->first;
+ while ((current != NULL) && (((RLEPair *) current->item)->end
+ < startCol)) {
+ current = current->next;
+ }
+ if ((current != NULL) && (((RLEPair *) current->item)->start < closest)) {
+ answer = current;
+ closest = ((RLEPair *) answer->item)->start;
+ }
+ }
+ return answer;
+}
+
+
+
+
+void testRLEMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Test the reading of tiff files into RLE format
+Effects: Reads filename, puts it into RLE format then prints
+Rev: 10/7/95 KM
+---------------------------------------------------------------*/
+{
+ RLEMap m;
+
+ m.readMap(filename);
+ if (m.imageLength() < 100) printMap(&m);
+ testpixelsBetween(&m); // In RLEPairs.cc - tests pixelsBetween function
+}
+
+
+void printMap(RLEMap * map)
+{
+ int startX = 0;
+ int endX = 0;
+ int pos;
+ RLEPair * item;
+ RLEPairs * rowdata;
+
+ RLEMap & m = *map;
+
+ for (int r = 0; r < m.imageLength(); r++)
+ {
+ startX = 0;
+ endX = -1;
+ rowdata = m[r];
+
+ for (ListElement* ptr = rowdata->first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ for ( pos = endX+1; pos< startX; pos++)
+ cout << " ";
+ endX = item->end;
+ for ( pos = startX; pos <= endX; pos++)
+ cout << "X";
+ }
+ cout << "" << endl;
+ }
+
+}
+
+void RLEMap::printPairs(int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Prints RLE Pairs for this map from startRow to endRow
+Rev:11/2 KM
+---------------------------------------------------------------*/
+{
+ int startX, endX;
+ RLEPair * item;
+ RLEPairs * rowdata;
+
+ RLEMap & m = *this;
+ cout << "printing rows " << startRow << " to " << endRow << endl;
+ for (int r = startRow; r <= endRow; r++)
+ {
+ rowdata = m[r];
+
+ cout << "row " << r << " ";
+
+ for (ListElement *ptr = rowdata->first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ endX = item->end;
+ cout << "(" << startX << "," << endX <<")";
+ }
+ cout << endl;
+ }
+}
+
+void testpixelsBetween(RLEMap * map)
+// tests out a row by making sure that pixels between
+// 0 and ImageWidth - 1 == pixels in sub ranges of 29 pixels
+// Test performed on center row.
+{
+ int start = 0;
+ int end = 28;
+
+ int pcount;
+ int sum = 0;
+ RLEPairs * pairs;
+ int row;
+
+ for (row = 0; row < map->imageLength(); row++)
+ {
+ pairs = (*map)[row];
+ while (start <= map->imageWidth())
+ {
+ pcount = pairs->pixelsBetween(start, end);
+// printf("row %d col %d to %d - %d pixels\n",row, start,end,pcount);
+ sum += pcount;
+ start +=29;
+ end +=29;
+ }
+ if (sum !=0)
+ printf("row %d sum was %d , should be %d\n", row, sum, pairs->numPixels);
+ assert(sum == pairs->numPixels);
+ start = 0;
+ end = 28;
+ sum = 0;
+ }
+
+ delete pairs;
+}
+
+int RLEMap::deskew()
+/* going to be a (near-blind) steal from fateman */
+/*--------------------------------------------------------------
+Primary Purpose: deskewing an RLEMap
+Arguments: none
+Return Value: 1 if the page is altered, 0 if not
+Effects: RLEMap is straightened out
+Constraints: RLE shouldn't be tilted too much (< 10deg)
+Rev: AR 11/1/95
+---------------------------------------------------------------*/
+{
+ double skew = -get_skew(this); /* skew in rad */
+ if((skew >= MINIMUM_SKEW_ANGLE)||(skew <= - MINIMUM_SKEW_ANGLE))
+ {
+ double h = tan(skew / (180 / M_PI));
+ if(h > 0)
+ {
+ tilt_and_slant(1/h, 1); /* clockwise */
+ return 1;
+ }
+ else if (h < 0)
+ {
+ tilt_and_slant(-(1/h), -1); /* counter clockwise */
+ return 1;
+ }
+ else
+ return 0;
+ }
+ else
+ return 0;
+}
+
+#define DEBUG_TILT_AND_SLANT 1
+void RLEMap::tilt_and_slant(double step, int direction)
+/*--------------------------------------------------------------
+Primary Purpose: do the work of shifting the RLEMap
+Arguments: step--something about how many rows to go before shifting,
+ direction--counterclockwise or clockwise
+Return Value: none
+Effects: rotates the RLEMap some ammount by tilting the map slightly,
+ then slanting it. (duh). Not an exact rotation
+Constraints:
+Rev: AR 11/1/95
+---------------------------------------------------------------*/
+{
+ if(DEBUG_TILT_AND_SLANT)
+ printf("Call to tilt_and_slant: step = %lf, direction = %d\n ", step, direction);
+ if(direction > 0)
+ {
+ tilt(step, direction);
+ slant(step, direction);
+ }
+ else
+ {
+ slant(step, -direction);
+ tilt(step, direction);
+ }
+}
+
+#define DEBUG_SLANT 1
+/* "slant a picture by shifting lines horizontally 1 bit every step rows"
+ ;; dir 1 means shift to right as row number increases
+ ;; dir -1 means shift by left
+ ;; this does not rotate the picture, since rows are each unchanged.
+ ;; the effect of a positive direction, say (slantpic pic 3 3 1)
+ ;; is to "italicize".
+*/
+void RLEMap::slant(double step, int direction)
+{
+ if(DEBUG_SLANT)
+ printf("Slant called, step = %lf, dir = %d\n", step, direction);
+ fImageWidth += (int)((double)fImageLength / (double)step);
+ int shift_amount = direction;
+ int num_steps = 1;
+ for(int i = 0; i < fImageLength; i++)
+ {
+ if(i > (num_steps*(int)step))
+/* if we have gone through step rows, increment the shift */
+ {
+ shift_amount += direction;
+ num_steps++;
+ }
+/* printf("Shifting row %d by %d\n", i, shift_amount); */
+ fMapData[i]->shift(shift_amount);
+ }
+}
+
+
+void RLEMap::display_intervals(char* color)
+{
+ display_intervals(".main_window.display.work_space", SCALE_FACTOR, color);
+}
+
+void RLEMap::display_intervals(char* window, double scaleFactor, char * color)
+{
+ assert(scaleFactor > 0);
+ if(!DISPLAY_IMAGE)
+ return;
+ double skip;
+ last_status = 0.0;
+ printf("scaleFactor = %lf ", scaleFactor);
+ skip = 1.0 / scaleFactor;
+ printf("Skip = %lf\n", skip);
+
+ /* delete any garbage hanging around */
+ docommand("%s delete all", window);
+
+ set_status("Displaying Image: 0%...");
+ for (int i = 0, j= 0; i < fImageLength;i= int(j * skip), j++)
+ {
+
+ set_display_status((int)(i*skip), fImageLength);
+ fMapData[i]->draw_pairs(window, scaleFactor,
+ i, color, 1.0/skip);
+
+
+ }
+
+ last_status = 0.0;
+ update();
+ set_status("Displaying Image: Done");
+}
+
+void RLEMap::tilt(double step, int direction)
+{
+/* printf("tilt called, step = %lf, dir = %d\n", step, direction); */
+ int old_height = fImageLength;
+ int new_height = /* ceiling */ (int)(((double)fImageWidth) / step) + old_height;
+ int delta = old_height - new_height;
+ RLEPairs ** new_data = new RLEPairs*[new_height];
+ for(int i = 0; i < new_height; i++)
+ {
+ new_data[i] = new RLEPairs(i);
+ }
+ for(int j = 0; j < old_height; j++)
+ {
+ tilt_row(j, delta, new_data, step, direction);
+ }
+ fMapData = new_data; /* probably want to delete old data */
+ fImageLength = new_height;
+ display_intervals("black");
+}
+
+
+void RLEMap::tilt_row(int old_row_index, int old_new_row_diff, RLEPairs** new_data, double step, int direction)
+{
+/* printf("Tilt row called: old row = %d, row diff = %d, step = %lf, dir = %d\n", old_row_index, old_new_row_diff, step, direction); */
+
+ double cur_x = 0; /* I don't know what will happen with negative rows */
+ double new_x;
+ int cur_y = old_row_index + (old_new_row_diff * direction);
+
+ while(((new_x = cur_x + step) < fImageWidth) &&
+ (cur_y >= 0) && (cur_y < fImageLength))
+ {
+ RLEPairs* new_pairs;
+ new_pairs = (fMapData[old_row_index])->extract((int) cur_x, (int)new_x);
+ new_data[cur_y]->merge(new_pairs);
+ cur_x = new_x + 1;
+ cur_y += direction;
+ }
+}
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/RLEMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,105 @@
+#ifndef _RLEMap_H
+#define _RLEMap_H
+#include "system.h"
+#include "RLEPair.h"
+#include <math.h>
+
+
+
+/* The RLEMap Class is a run length encoded representation of a bitmap
+ The I/O functions defined for this class read and write data from
+ TIFF format files using the Silicon Graphics TIFF library.
+
+*/
+
+class RLEMap{
+ public:
+
+ // Constructor, Destructor and copy functions
+ RLEMap();
+ ~RLEMap();
+
+ // Selector and Modifier functions
+
+ int & imageLength();
+ int & imageWidth();
+ inline int pixels_between(int start, int finish, int row_num){
+ return fMapData[row_num]->pixelsBetween(start, finish);
+ };
+
+ MapStatus & status();
+
+
+ void display_intervals(char* color);
+ void display_intervals(char* window, double scaleFactor,
+ char * color);
+ ListElement* FindNearHorizDot(int startCol, int startRow, int endRow);
+ ListElement* FindNearVertDot(int startCol, int endCol,
+ int startRow, int endRow);
+
+ // I/O operations. Read and Write are from/to 2 level TIFF files
+
+ MapStatus readMap(char * filename);
+ MapStatus writeMap(char * filename);
+
+ // Data Access and low level manipulation functions
+ // Access a row of the Map
+ // usage: rmap[i] returns row i of the RLEMap.
+ // row performs the same function
+ RLEPairs * operator [](int i);
+ RLEPairs * row(int i);
+
+ int pixelsInRegion(Point ul, Point lr);
+ short int grayScale(Point ul, Point lr);
+
+
+ void tilt_and_slant(double, int);
+ int deskew();
+ void tilt(double , int);
+ void slant(double, int);
+ void tilt_row(int, int, RLEPairs** , double, int);
+ RLEPairs ** fMapData;
+ // Array length = fImageLength One list
+ // for each row
+
+
+ // prints RLEMap representation for row range
+ void printPairs(int startRow, int endRow);
+
+private:
+ // Size and status info
+ int fImageLength;
+ int fImageWidth;
+
+ MapStatus fStatus;
+};
+
+void printMap(RLEMap * map);
+void testpixelsBetween(RLEMap * map);
+double get_skew(RLEMap*);
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/RLEPair.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,360 @@
+#include "system.h"
+#include "tcl_interface.h"
+
+
+
+
+/**** RLEPair.cc
+ Member functions for RLEPairs
+ RLEPair functions defined in the function header
+ rev 10/20 Kathey Marsden
+*/
+
+
+
+
+RLEPairs::RLEPairs(int rownum)
+:List()
+{
+ numPixels = 0;
+ row = rownum;
+}
+
+
+RLEPairs::~RLEPairs()
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (RLEPair *) (ptr->item);
+ }
+
+ while(!IsEmpty())
+ Remove();
+}
+
+void RLEPairs::print_pairs()
+{
+ RLEPair* item;
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ printf("(%d->%d)", item->start, item->end);
+ }
+ printf("\n");
+}
+
+void RLEPairs::draw_pairs(char * window, double scaleFactor,
+ int y_coord, char* color, double width)
+{
+ RLEPair* item;
+ scale(y_coord,scaleFactor);
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ int line_start = item->start;
+ int line_end = item->end;
+ scale(line_start,scaleFactor);
+ scale(line_end, scaleFactor);
+ if(width > 1)
+ docommand("%s create line %d %d %d %d -fill %s -width %d", window,
+ line_start, y_coord, line_end, y_coord, color, (int)width);
+ else
+ docommand("%s create line %d %d %d %d -fill %s", window, line_start,
+ y_coord, line_end, y_coord, color);
+ }
+}
+
+void RLEPairs::shift(int bits)
+/*--------------------------------------------------------------
+Primary Purpose: shift a row right by bits
+Arguments: bits: the number of bits to shift by
+Return Value: none
+Effects: ...
+Constraints:
+Rev: 11/1 AR
+
+Someone should write a macro for this for loop. . .
+---------------------------------------------------------------*/
+{
+ RLEPair* item;
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ item->start += bits;
+ item->end += bits;
+ }
+}
+
+void RLEPairs::fill(unsigned char * contents, int contentsLength,
+ int contentsRow)
+/*--------------------------------------------------------------
+Primary Purpose: Take the contents of a line scanned from a
+TIFF file and put it into this list of RLEPairs
+Arguments: contents is the result of a TIFFReadScanline function
+ contentsLength is the number of unsigned chars in contents
+ contentsRow is the Row that this interval belongs to
+Effects: Scans contents and for each range of black pixels, adds
+ an RLEPair to the list
+Constraints:
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ if(contentsRow != row)
+ printf("Warning: merging to %d what appears to belong at %d\n", row, contentsRow);
+
+ bool inPair = FALSE; // Flag set to TRUE when Run of black starts
+ short int startCol =0;
+ short int endCol = 0;
+ short int curCol= 0;
+ unsigned char nextChar;
+
+// Deallocate old pairs
+ for(ListElement *ptr = first; ptr != NULL; ptr = ptr->next)
+ removeAt(ptr);
+
+ for (int c = 0; c < contentsLength; c++)
+ {
+ nextChar = contents[c];
+
+
+ if (nextChar == 255)
+ {
+ endCol = c*8+7;
+ // If this is a new pair we also have to set start col
+ if (!inPair)
+ {
+ startCol = c*8;
+ inPair = TRUE;
+ }
+ }
+ else if(nextChar == 0)
+ {
+ if (inPair)
+ {
+ RLEPair * pair = new RLEPair(startCol,endCol, contentsRow);
+ Append(pair);
+ inPair = FALSE;
+ numPixels += endCol - startCol +1;
+ }
+ }
+ else { // Start Shifting and look at each bit
+
+ // high bit on left
+ for (int i = 7; i >=0; i--)
+ {
+ curCol = 7 + c * 8 - i;
+ /*** Black Pixel handling */
+ if ((nextChar>>i)&1) // if this is a black pixel
+ {
+ if (!inPair) // If not in a Pair, Start of a new Pair
+ {
+ inPair = TRUE;
+ startCol = curCol;
+ endCol = curCol;
+ }
+ else // Extend current Pair
+ endCol = curCol;
+ }
+ /*** White Pixel Handling **/
+ else // This is a white pixel
+ if (inPair) // Close off the Pair add to the list
+ {
+ RLEPair * pair = new RLEPair(startCol,endCol, contentsRow);
+ Append(pair);
+ inPair = FALSE;
+ numPixels += endCol - startCol+1;
+ }
+ // if not in pair just continue
+ }
+ }
+ }
+}
+
+
+#define DEBUG_PIXELS_BETWEEN 0
+int RLEPairs::pixelsBetween(int startCol, int endCol)
+/*--------------------------------------------------------------
+Return Value: Returns the number of black pixels between column
+ startCol and column endCol.
+Constraints: 0 <= startCol < endCol; endCol < imageWidth of RLEMap
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ if (DEBUG_PIXELS_BETWEEN)
+ {
+ printf("Call to pixels between: start = %d, finish = %d\n", startCol, endCol);
+ }
+/* assert(startCol < endCol); */
+ if(!(startCol < endCol))
+ {
+ printf("Warning, startcol %d not less then endcol %d (setting startcol = endcol)\n", startCol, endCol);
+ startCol = endCol;
+ }
+
+ int count= 0;
+ int pairStart;
+ int pairEnd;
+ RLEPair * item;
+
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ pairEnd = item->end;
+ pairStart = item->start;
+ // Don't loop anymore if past endCol
+ if (pairStart > endCol) break;
+
+ // range starts after this pair
+ if (pairEnd < startCol) ; // do nothing
+ // range starts and ends in this pair
+ else if (pairStart <= startCol && pairEnd >= endCol)
+ count += endCol - startCol +1;
+ // range starts in this pair but ends later
+ else if (pairStart <= startCol && pairEnd <= endCol)
+ count += pairEnd - startCol + 1;
+ // range includes this whole pair
+ else if (pairStart >= startCol && pairEnd <= endCol)
+ count += pairEnd - pairStart + 1;
+ // range ends in the middle of this pair
+ else
+ count += endCol - pairStart + 1;
+
+ // printf(" %d , %d - %d newcount\n", pairStart, pairEnd, count);
+ }
+
+
+
+ return count;
+
+}
+
+
+RLEPairs * RLEPairs::extract(int startCol, int endCol)
+/*--------------------------------------------------------------
+Primary Purpose: Makes a copy of a sectionn of this row
+Arguments: startCol - starting column to extract
+ endCol - ending column to extract
+Return Value: a pointer to a new RLEPairs that has been
+extracted from this.
+Constraints: start <= first col of first pair
+ end <= ending col of lat pair
+Rev: KM 11/16
+---------------------------------------------------------------*/
+{
+
+ int pairStart;
+ int pairEnd;
+
+ RLEPair * item;
+ RLEPairs * returnPairs = new RLEPairs(row);
+
+
+
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ pairEnd = item->end;
+ pairStart = item->start;
+ row = item->row;
+
+ // Don't loop anymore if past endCol
+ if (pairStart > endCol) break;
+
+ // range starts after this pair
+ if (pairEnd < startCol) ; // do nothing
+
+ // range starts and ends in this pair
+ else if (pairStart <= startCol && pairEnd >= endCol)
+ {
+ RLEPair * addpair = new RLEPair(startCol,endCol, row);
+ returnPairs->Append(addpair);
+ }
+ // range starts in this pair but ends later
+ else if (pairStart <= startCol && pairEnd <= endCol)
+ {
+ RLEPair * addpair = new RLEPair(startCol,pairEnd, row);
+ returnPairs->Append(addpair);
+ }
+
+ // range includes this whole pair
+ else if (pairStart >= startCol && pairEnd <= endCol)
+ {
+ RLEPair * addpair = new RLEPair(pairStart,pairEnd, row);
+ returnPairs->Append(addpair);
+ }
+ // range ends in the middle of this pair
+ else
+ {
+ RLEPair * addpair = new RLEPair(pairStart,endCol, row);
+ returnPairs->Append(addpair);
+ }
+
+ }
+/* printf("Extract returning: ");
+ returnPairs->print_pairs(); */
+ return returnPairs;
+}
+
+
+
+void RLEPairs::merge(RLEPairs * pairs)
+/*--------------------------------------------------------------
+Primary Purpose: Inserts the black regions of pairs into this.
+Arguments: pairs - RLEPair list to be combined with this one.
+Return Value: A new list that represtents merged data
+Effects: Modifies this to add pairs. deallocates pairs.
+Constraints:
+Rev: 11/16/95
+---------------------------------------------------------------*/
+{
+ if((!pairs) || (pairs->length == 0))
+ return;
+
+ int pairStart;
+ int pairEnd;
+
+ RLEPair * item;
+ int lastCol = ((RLEPair *) (last->item))->end;
+ int lastColpairs = ((RLEPair *) (pairs->last->item))->end;
+ if (lastCol < lastColpairs)
+ lastCol = lastColpairs;
+
+
+ int numChars = lastCol/8 + 1;
+ uchar buffer[numChars];
+ for(int i = 0; i < numChars; i++) buffer[i] = 0;
+
+ for (ListElement * ptr2 = pairs->first; ptr2 != NULL; ptr2 = ptr2->next)
+ {
+ item = (RLEPair *)(ptr2->item);
+ setRange(buffer, item->start, item->end);
+ }
+
+
+ for (ListElement * ptr=first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ setRange(buffer, item->start, item->end);
+ }
+
+ fill(buffer, numChars, row);
+
+/* printf("Merged to form: ");
+ print_pairs(); */
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/RLEPair.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,69 @@
+#ifndef _RLEPair_H
+#define _RLEPair_H
+
+#include "list.h"
+
+
+// Indicates a series of black pixels in a row.
+class RLEPair {
+public:
+
+ RLEPair()
+ :start(0),end(0) {};
+
+ RLEPair(int first, int last, int thisRow)
+ :start(first), end(last), row(thisRow) {};
+ RLEPair(int first, int last)
+ :start(first), end(last), row(-1) {};
+ ~RLEPair() {};
+
+ short int start;
+ short int end;
+ short int row;
+};
+
+
+
+class RLEPairs :public List {
+ public:
+
+ RLEPairs(int row_num);
+ ~RLEPairs();
+
+ // Create RLEPair Representation of contents read from TIFF file
+ void fill(unsigned char * contents, int contentsLength, int contentsRow);
+ int pixelsBetween(int start, int end);
+ int numPixels;
+ void shift(int);
+ void draw_pairs(char * window, double scaleFactor,
+ int y_coord, char* color, double width);
+ void print_pairs();
+
+ RLEPairs * extract(int startcol, int endcol);
+ // create a copy of this from startcol to endcol.
+ // and return pointer to RLEPairs
+
+ void merge(RLEPairs * pairs);
+ // Merges pairs into this.
+
+ private:
+ int row;
+
+
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Word.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,233 @@
+#include "list.h"
+#include "system.h"
+#include "stdio.h"
+
+extern Page * global_page;
+
+Word::Word(char * word, int length)
+/*--------------------------------------------------------------
+Primary Purpose: Create a word without pointers to components
+Arguments: word is the character string length is the length
+Words created with this constructor have a null component pointer
+Rev: 12/5/95
+
+---------------------------------------------------------------*/
+{
+ charCount = 1;
+ compCount = 0;
+ characters = new char[length];
+ strcpy(characters, word);
+ confid = 255;
+ ul = NOPNT;
+ lr = NOPNT;
+ character = NULL;
+ mispelled = 0;
+}
+
+
+Word::Word(ListElement * first, int count, int charlength)
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments: first is a pointer to a ListElement that contains
+the first Component in the word. count is the number
+of components in the word. charlength is the actual length of the word
+
+Effects: Sets all data members of the word class
+Rev: 11/6/95
+---------------------------------------------------------------*/
+{
+ mispelled = 0;
+ Component * firstComp = (Component *) first->item;
+ Component * item;
+ int i;
+ int charOffset=0;
+ ListElement * ptr;
+
+
+ characters = new char[charlength+1]; // ascii translation
+ character = new (Component *)[count];
+ charCount = charlength;
+ compCount = count;
+ ul = firstComp->ul();
+
+ confid = 256;
+
+ for (i = 0, ptr = first; i < count; ptr = ptr->next, i++)
+ {
+ item = (Component *)(ptr->item);
+ strncpy(&(characters[charOffset]),item->fasciiId,item->asciiLen());
+// printf("copied %s to %s at %d\n",item->fasciiId, characters,charOffset);
+ charOffset += item->asciiLen();
+
+ // assert(charOffset <= charlength);
+ if (charOffset > charlength)
+ {
+ printf("\ncharOffset: %d charlength: %d", charOffset,charlength);
+ }
+ character[i] = item;
+ if (item->confid() < confid)
+ confid = item->confid();
+ if(item->ul().y() < ul.y())
+ ul.y() = item->ul().y();
+ if(i == count-1) // this is the last character
+ lr = item->lr();
+ }
+
+
+ characters[charlength] = '\0';
+ if(0)
+ printf("Identified a word: %s\n", characters);
+
+};
+
+
+ Word::~Word()
+{
+ if (characters != NULL)delete characters;
+ if (character != NULL) delete character;
+};
+
+
+Words::~Words()
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Word *) (ptr->item);
+ }
+
+}
+
+int Words::writeWordPos(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word position, confidence length and string to file
+Arguments: output file name
+Return Value: 1 for success 0 for file
+Effects: create and write out to filename each word in the following format
+All numeric fields are in fixed columns 7 characters wide
+ upperleft x pos
+ upperleft y pos
+ word confidence
+ character count
+ string
+ \N
+Rev: KM 11/25
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+
+ for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+ {
+ Word * word = (Word *) ptr->item;
+ if (word->ul.x() == -1) continue; // dont print new lines.
+ fprintf(outfile, " %6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
+ word->confid, word->charCount, word->characters );
+ }
+ fclose(outfile);
+ return 1;
+}
+
+
+int Words::writeWordbox(char * filename, int xoffset=0, int yoffset=0,
+ Page * page=global_page, bool equationsOnly=0)
+/*--------------------------------------------------------------
+Primary Purpose: Write Scanworx wordbox format
+Arguments: output file name x and y offset, parent page and bool for printing
+equations only
+Return Value: 1 for success 0 for file
+Effects: create and write out to filename each word in the following format
+ string
+ upperleft x pos
+ upperleft y pos
+
+ lowerright x pos
+ lowerright y pos
+
+ upperright x pos
+ upperright y pos
+
+ lowerleft x pos
+ lowerleft y pos
+
+ \N
+Rev: KM 11/25
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ Point cntr;
+
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+ fprintf(outfile, "\n"); // print new lines.
+ for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+ {
+ Word * word = (Word *) ptr->item;
+ cntr = word->center();
+ if (!(equationsOnly) || (page->inEquation(cntr.x(), cntr.y())))
+ {
+ if (word->ul.x() == -1)
+ fprintf(outfile, "\n"); // print new lines.
+ else
+ fprintf(outfile, "%s %d %d %d %d %d %d %d %d \n",
+ word->characters,
+ word->ul.x() + xoffset, word->ul.y() + yoffset,
+ word->lr.x() + xoffset, word->lr.y() + yoffset,
+ word->lr.x() + xoffset, word->ul.y() + yoffset,
+ word->ul.x() + xoffset, word->lr.y() + yoffset);
+ }
+ }
+ fclose(outfile);
+ return 1;
+}
+
+
+
+
+
+int Words::writeAscii(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word list to ascii file
+Arguments: filename to write to
+Return Value: 1 if successful 0 if unsuccessful
+Effects: Writes words to fill in text format
+
+Rev: 11/25 KM
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ Word * prev = (Word *) first->item;
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+
+ for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+ {
+ Word * word = (Word *) ptr->item;
+ fprintf(outfile, "%s ", word->characters );
+
+ }
+ fclose(outfile);
+ return 1;
+
+}
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Word.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,73 @@
+
+#ifndef _WORD_H_
+#define _WORD_H_
+
+#include "list.h"
+#include "Component.h"
+#include "string.h"
+
+class Page;
+
+class Word {
+ public:
+
+ // Constructor scans through count components starting at first
+ // to set data fields. length is the actual length of the string
+ Word(ListElement * first, int count, int charlength);
+ Word(char * word, int length);
+ ~Word();
+
+ Point center() { return (Point((ul.x() + lr.x())/2,
+ (ul.y() +lr.y())/2)); };
+ char * characters; // text string
+ int charCount; // Number of characters in the string
+ int compCount;
+
+ short mispelled;
+ // spelled correctly? only valid if the word
+ // has been spellchecked
+
+ Confidence confid; // The average confidence of each char in word
+ Point ul; // upper left point of word
+ Point lr; // lower right point of word
+
+ Component ** character; // an array of components that
+ // make up this word
+};
+
+
+class Words:public List{
+
+ public:
+ int num_words;
+ Words():List(){};
+ ~Words();
+ int writeWordPos(char * filename);
+ int writeWordbox(char * filename, int xoffset, int yoffset,
+ Page * page, bool equationsOnly);
+ int writeAscii(char * filename);
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Zone.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,72 @@
+#include "Zone.h"
+#include "system.h"
+
+
+Zone::Zone(Point ulp, Point lrp, ZonedPage * parentPage)
+ :fparent(parentPage) , ful(ulp), flr(lrp)
+ {
+ fpage = new Page;
+ convertMap(fparent->bmap(),fpage->rmap(), ful, flr);
+ convertMap(fpage->rmap(), fpage->bmap(), NOPNT, NOPNT);
+ }
+
+void Zone::buildPage(ZonedPage * parentPage)
+ // ful and flr must already be set
+{
+ if (fpage != NULL)
+ delete fpage;
+ fparent = parentPage;
+ fpage = new Page;
+ convertMap(fparent->bmap(),fpage->rmap(), ful, flr);
+ convertMap(fpage->rmap(), fpage->bmap(), NOPNT, NOPNT);
+
+}
+
+Zone::~Zone()
+{
+ delete fpage;
+}
+
+Zones::Zones()
+ :List()
+{}
+
+Zone * Zones::findZone(int x, int y)
+{
+ for (ListElement *ptr = first; ptr != NULL && ptr->item!=NULL;
+ ptr = ptr->next)
+ {
+
+ Zone * item = (Zone *) ptr->item;
+ if ( x >= item->ul().x() && x <= item->lr().x() &&
+ y >= item->ul().y() && y <= item->lr().y())
+ return item;
+ }
+ return NULL;
+
+}
+
+
+Zones::~Zones()
+{
+ for (ListElement *ptr = first; ptr != NULL && ptr->item!=NULL;
+ ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Zone *) (ptr->item);
+ }
+ while(!IsEmpty())
+ Remove();
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/Zone.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,70 @@
+#ifndef _ZONE_H
+#define _ZONE_H
+#include "stdlib.h"
+#include <iostream.h>
+#include "Point.h"
+#include "system.h"
+#include "list.h"
+#include "RLEMap.h"
+#include "BitMap.h"
+#include "Page.h"
+
+class Page;
+class ZonedPage;
+
+extern Point NOPNT;
+
+class Zone
+{
+
+
+public:
+ Zone()
+ : ful(NOPNT) , flr(NOPNT) , fpage(NULL), fparent(NULL)
+ {};
+ Zone(Point ulp, Point lrp)
+ :ful(ulp), flr(lrp), fpage(NULL), fparent(NULL)
+ {};
+
+ Zone(Point ulp, Point lrp, ZonedPage * parentPage);
+
+ ~Zone();
+
+ void buildPage(ZonedPage * parentPage);
+
+
+ inline Point ul() {return ful;};
+ inline Point lr() {return flr;};
+ inline Page * page() {return fpage;};
+ inline Page * parent() {return fparent;};
+
+private:
+ Point ful;
+ Point flr;
+ Page * fpage;
+ Page * fparent; // page of which this is a part
+};
+
+
+class Zones : public List
+{
+public:
+ Zones();
+ Zone * findZone(int x, int y);
+ ~Zones();
+
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/browser.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,35 @@
+#!/usr/sww/bin/wish -f
+
+scrollbar .scroll -command ".list yview"
+listbox .list -yscrollcommand ".scroll set" -relief raised -geometry 20x20
+pack append . .scroll {right filly} .list {left expand fill}
+
+if {$argc > 0} {
+ set dir [lindex $argv 0]
+} else {
+ set dir .
+}
+foreach i [exec ls -aF $dir] {
+ .list insert end $i
+}
+proc browse {dir file} {
+ if {$dir != "."} {set file $dir/$file}
+ if [file isdirectory $file] {
+ exec browser.tcl $file &
+ } else {
+ if [file isfile $file] {
+ exec mx $file &
+ } else {
+ puts stdout "This is neither dir nor regular file"
+ }
+ }
+}
+
+bind .list <Control-c> {destroy .}
+bind .list <Double-Button-1> {
+ foreach i [selection get] {browse $dir $i}
+}
+focus .list
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/caution.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define WarningBits_width 32
+#define WarningBits_height 32
+static char WarningBits_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x10,0x01,
+ 0x00,0x00,0x08,0x07,0x00,0x00,0x08,0x0e,0x00,0x00,0x04,0x0e,0x00,0x00,0x04,
+ 0x1c,0x00,0x00,0x02,0x1c,0x00,0x00,0xe2,0x38,0x00,0x00,0xf1,0x39,0x00,0x00,
+ 0xf1,0x71,0x00,0x80,0xf0,0x71,0x00,0x80,0xf0,0xe1,0x00,0x40,0xf0,0xe1,0x00,
+ 0x40,0xf0,0xc1,0x01,0x20,0xf0,0xc1,0x01,0x20,0xf0,0x81,0x03,0x10,0xe0,0x80,
+ 0x03,0x10,0xe0,0x00,0x07,0x08,0xe0,0x00,0x07,0x08,0xe0,0x00,0x0e,0x04,0x00,
+ 0x00,0x0e,0x04,0xe0,0x00,0x1c,0x02,0xf0,0x01,0x1c,0x02,0xf0,0x01,0x38,0x01,
+ 0xe0,0x00,0x38,0x01,0x00,0x00,0x70,0x01,0x00,0x00,0x70,0xff,0xff,0xff,0x7f,
+ 0xf8,0xff,0xff,0x3f,0x00,0x00,0x00,0x00};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/comp_menu.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,133 @@
+set Comp_menu_up 0
+#current component ascii value
+set curCompId " "
+
+
+set comp_menux 0
+set comp_menuy 0
+
+proc init_component_menu { w x y } {
+ global BACKGROUND FOREGROUND FONT Comp_menu_up global comp_menux \
+ comp_menuy validComponent
+
+
+ if { $Comp_menu_up == 1 } {
+ focus .comp_menu.menu
+ return 1
+ }
+
+ set Comp_menu_up 1
+ toplevel .comp_menu
+ set comp_menux [expr int([expr $x+50])+ 200]
+ set comp_menuy [expr int([expr $y])+ 100]
+
+ wm geometry .comp_menu +$comp_menux+$comp_menuy
+ wm title .comp_menu "Component..."
+ frame .comp_menu.menu -background $BACKGROUND
+
+ button .comp_menu.menu.join -text "Join" -command component_join \
+ -bg $BACKGROUND -fg $FOREGROUND -font $FONT
+ button .comp_menu.menu.horiz -text "Horizontal Split" \
+ -command component_horizontal_split \
+ -bg $BACKGROUND -fg $FOREGROUND -font $FONT
+
+ button .comp_menu.menu.vert -text "Vertical Split" -command \
+ component_vertical_split \
+ -bg $BACKGROUND -fg $FOREGROUND -font $FONT
+ button .comp_menu.menu.learn -text "Learn" -command component_learn \
+ -bg $BACKGROUND -fg $FOREGROUND -font $FONT
+ button .comp_menu.menu.close -text "Cancel" -command {
+ grab release .comp_menu
+ destroy .comp_menu; set Comp_menu_up 0 ; } \
+ -bg $BACKGROUND -fg $FOREGROUND -font $FONT
+
+ pack .comp_menu.menu .comp_menu.menu.join .comp_menu.menu.horiz \
+ .comp_menu.menu.vert .comp_menu.menu.learn \
+ .comp_menu.menu.close -fill x
+ focus .comp_menu.menu
+ #grab .comp_menu
+
+}
+
+
+proc component_join { } {
+global Comp_menu_up
+puts stdout "component_join"
+grab release .comp_menu.menu
+#temporarily bind <Double-1> to join components
+bind .main_window.display.work_space <Double-1> {
+ component_select %W %x %y
+ # joins this selection with next selected component
+ JOIN_COMP
+
+ #put <Double -1> back to the old setting
+ bind .main_window.display.work_space <Double-1> {
+ component_select %W [%W canvasx %x] [%W canvasy %y]
+ init_component_menu %W [%W canvasx %x] [%W canvasy %y]
+ }
+}
+destroy .comp_menu
+set Comp_menu_up 0
+}
+
+proc component_horizontal_split { } {
+global Comp_menu_up
+puts stdout "component_horizontal_split"
+grab release .comp_menu.menu
+SPLIT_COMP_HORIZ
+
+destroy .comp_menu
+set Comp_menu_up 0
+
+}
+
+
+proc component_learn { } {
+ global Comp_menu_up curCompId comp_menux comp_menuy
+puts stdout "component_learn"
+#pops up a little window to put in id for component
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT write_image
+
+
+ toplevel .learn_comp -background $BACKGROUND
+ wm geometry .learn_comp +[expr $comp_menux +20]+[expr $comp_menuy +20]
+ wm title .learn_comp "Learn Component Id"
+ grab set .learn_comp
+
+
+
+ label .learn_comp.image -bitmap @$write_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .learn_comp.s -background $BACKGROUND
+ label .learn_comp.s.txt -text "Learn Component as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+entry .learn_comp.s.ent -relief sunken -bd 2 -textvariable curCompId -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .learn_comp.s.txt .learn_comp.s.ent -side top
+ frame .learn_comp.buttons
+button .learn_comp.buttons.ok -text OK -command { LEARN_COMP $curCompId ; \
+ grab release .learn_comp; destroy .learn_comp} -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+button .learn_comp.buttons.cancel -text Cancel -command {destroy .learn_comp } -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .learn_comp.buttons.ok .learn_comp.buttons.cancel -side left -expand 1 -fill x
+ pack .learn_comp.image .learn_comp.s .learn_comp.buttons -side top
+ .learn_comp.s.ent icursor 0
+ .learn_comp.s.ent select range 0 10
+ focus .learn_comp.s.ent
+
+ bind .learn_comp.s.ent <Return> {
+ LEARN_COMP $curCompId
+ grab release .learn_comp
+ destroy .learn_comp
+ }
+
+destroy .comp_menu
+set Comp_menu_up 0
+}
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/convertMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,187 @@
+/** convertMap.h
+
+Functions for converting from one Map form to another
+and extracting smaller maps. There are four polymorphic
+functions. The general format is
+
+convertMap(MapType * source,MapType * target, Point ul, Point lr)
+
+MapTypes can be - BitMap or RLEMap,
+source - the map to be converted from
+target - The map to be converted to. Memory will be allocated
+ for target's contents.
+ul and lr mark a range to copy. If both are the global var NOPNT
+the entire map will be copied.
+
+Functions return a MapStatus which will be VALID or OTHERERROR
+if there was an error in the conversion. An error might be due
+to a user specifying an out of range ul and lr
+
+**************************************************************/
+#include "system.h"
+#include "Point.h"
+#include "BitMap.h"
+#include "RLEMap.h"
+#include "convertMap.h"
+
+/**** BitMap Conversion ***/
+
+MapStatus convertMap(BitMap * source, RLEMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Primary Purpose: Convert a bitMap to a RLEMap
+Arguments: Right now just does full bit map coversion ul/lr dont work
+Return Value: Returns valid if copy did not encounter any errors.
+Effects:
+Constraints: target is a pointer to an RLEMap
+Rev: 10/24/95
+---------------------------------------------------------------*/
+{
+
+ uchar * rowdata;
+ int numChars;
+
+ assert(target != NULL); // target must be allocated with new RLEMap
+ // before being passed to this function.
+ assert(source !=NULL);
+ if (ul == NOPNT) ul = Point(0,0);
+ if (lr == NOPNT) lr = Point(source->imageWidth()-1,
+ source->imageLength()-1);
+ if (!(lr > ul)) return OTHERERROR;
+
+ target->imageWidth() = lr.x() - ul.x()+1;
+ target->imageLength() = lr.y() - ul.y()+1;
+ target->status() = source->status();
+
+ target->fMapData = new (RLEPairs*)[target->imageLength()];
+ for (int i = 0; i <= (lr.y() - ul.y()); i++)
+ {
+
+ numChars= target->imageWidth() /8 + 1; // number of char entries
+ // Create a list of RLEPairs for this row and fill with buffer data
+
+ rowdata = source->row(i+ul.y());
+
+ target->fMapData[i] = new RLEPairs(i);
+ target->fMapData[i]->fill(&(rowdata[ul.x()/8]), numChars, i);
+
+ }
+
+
+return VALID;
+}
+
+
+
+/**** RLEMap Conversion ***/
+MapStatus convertMap(RLEMap * source, BitMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Purpose: Converts an RLEMap to a BitMap;
+Arguments: Right now just does full bit map coversion ul/lr dont work
+Return Value: Returns valid if copy did not encounter any errors.
+Effects:
+Constraints: target is a pointer to a BitMap which must be previously
+allocated with new BitMap;
+Rev: 10/24/95
+---------------------------------------------------------------*/
+{
+
+
+ RLEPairs * rmapRowData;
+ RLEPair * item;
+ int numChars,startX,endX;
+
+ assert(target != NULL); // target must be allocated with new BitMap
+ // before being passed to this function.
+ assert(source !=NULL);
+
+ target->imageWidth() = source->imageWidth();
+ target->imageLength() = source->imageLength();
+ target->status() = source->status();
+
+ target->fMapData = new (uchar *)[target->imageLength()];
+
+ for (int i = 0; i < source->imageLength(); i++)
+ {
+
+ numChars= source->imageWidth() /8 + 1; // number of char entries
+ // Convert RLEPairs to uchar array
+ rmapRowData = source->row(i);
+
+ target->fMapData[i] = new uchar[numChars];
+ for(int j=0; j < numChars; j++)target->fMapData[i][j] = 0;
+ // convert this row from RLE to uchars
+
+ ListElement *ptr = rmapRowData->first;
+
+ for (; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ endX = item->end;
+ setRange(target->fMapData[i], startX, endX);
+ }
+
+ }
+
+
+return VALID;
+
+
+}
+
+
+//MapStatus convertMap(RLEMap * source, RLEMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments:
+Return Value:
+Effects:
+Constraints:
+Rev:
+---------------------------------------------------------------*/
+//{
+
+//return EMPTY;
+//}
+
+
+
+void testConvertMap(char * filename)
+// Reads in BitMap and converts to RLEMap then prints on screen
+// Next converts back to BitMap, back to RLEMap and prints
+{
+ BitMap * m = new BitMap;
+ RLEMap * rm = new RLEMap;
+
+
+ m->readMap(filename);
+ convertMap(m,rm);
+
+ cout << "length chk " << m->imageLength()<< "==" << rm->imageLength()<< endl;
+ cout << "width chk " << m->imageWidth()<< "==" << rm->imageWidth() << endl;
+ cout << "status chk"<<m->status() << "==" << rm->status() << endl;
+
+ printMap(rm);
+
+ // Now test conversion the other way
+ delete m;
+ m = new BitMap;
+ convertMap(rm,m);
+ delete rm;
+ rm = new RLEMap;
+ convertMap(m,rm);
+ printMap(rm);
+
+
+}
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/convertMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,53 @@
+#ifndef _CONVERTMAP_H
+#define _CONVERTMAP_H
+
+/** convertMap.h
+Functions for converting from one Map form to another
+and extracting smaller maps. There are four polymorphic
+functions. The general format is
+
+convertMap(MapType * source,MapType * target, Point ul, Point lr)
+
+MapTypes can be - BitMap, RLEMap, or VRLEMap
+source - the map to be converted from
+target - The map to be converted to. Memory will be allocated
+ for target's contents.
+ul and lr mark a range to copy. If either are the global var NOPNT,
+the entire map will be copied.
+
+Functions return a MapStatus which will be VALID or OTHERERROR
+if there was an error in the conversion. An error might be due
+to a user specifying an out of range ul and lr.
+
+**************************************************************/
+#include "system.h"
+#include "Point.h"
+#include "BitMap.h"
+#include "RLEPair.h"
+#include "RLEMap.h"
+#include "Point.h"
+
+
+
+/**** BitMap Conversion ***/
+MapStatus convertMap(BitMap * source, BitMap * target, Point ul = NOPNT,
+ Point lr= NOPNT);
+MapStatus convertMap(BitMap * source, RLEMap * target, Point ul= NOPNT,
+ Point lr= NOPNT);
+
+
+/**** RLEMap Conversion ***/
+MapStatus convertMap(RLEMap * source, BitMap * target, Point ul = 0,
+ Point lr = 0);
+MapStatus convertMap(RLEMap * source, RLEMap * target, Point ul= 0,
+ Point lr= 0);
+#endif
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/deskew.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,14 @@
+#include "deskew.h"
+#include "get_skew.h"
+#include "rotate.h"
+#include "bitmap.h"
+#include "RLEMap.h"
+
+void deskew(RLEMap* r)
+{
+ double skew = get_skew(r);
+ r->rotate(skew);
+}
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/deskew.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,7 @@
+#ifndef DESKEW
+#define DESKEW 1
+#include "bitmap.h"
+
+void deskew(Bitmap* b);
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/edit2.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define editor_width 32
+#define editor_height 32
+static char editor_bits[] = {
+ 0xe0,0xff,0x0f,0x00,0x30,0x00,0x08,0x00,0x28,0x70,0x09,0x00,0x24,0x00,0x08,
+ 0x00,0x22,0xd0,0x09,0x00,0x3f,0x00,0x08,0x00,0x01,0x00,0x08,0x00,0x71,0x01,
+ 0x08,0x00,0x01,0x00,0x08,0x00,0x01,0x00,0x08,0x00,0x61,0xb7,0x08,0x00,0x01,
+ 0x00,0x08,0x00,0xa1,0xdd,0x08,0x00,0x01,0x00,0x08,0x00,0x61,0xdb,0x08,0x00,
+ 0x01,0x00,0x08,0x00,0xa1,0xbb,0xe0,0x01,0x01,0x00,0x18,0x06,0x61,0xbb,0x04,
+ 0x08,0x01,0x00,0x62,0x10,0xe1,0xd6,0x99,0x20,0x01,0xf8,0xff,0x23,0x61,0x07,
+ 0x72,0x40,0x01,0x00,0x04,0xc0,0x01,0x00,0x18,0xa0,0x01,0x00,0xe8,0xd0,0x01,
+ 0x00,0x08,0xeb,0xff,0xff,0x0f,0xf4,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0xf0,
+ 0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0xc0};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/eye.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define eye_width 32
+#define eye_height 32
+static char eye_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0x07,0x00,0xff,0xff,0x3f,
+ 0x00,0x00,0xfc,0xff,0x00,0x00,0xc0,0xff,0x03,0x00,0x00,0xfe,0x0f,0x00,0x00,
+ 0xf0,0x1f,0x00,0x00,0xc0,0x3f,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0xfc,0x55,
+ 0x05,0x00,0xf0,0xaa,0x2a,0x00,0xc0,0x15,0xc0,0x00,0x00,0xaa,0x02,0x00,0x00,
+ 0x55,0x15,0x00,0x00,0xff,0xbf,0x00,0x00,0xff,0xff,0x01,0x00,0xff,0xff,0x07,
+ 0x00,0xff,0x3f,0x1f,0x00,0xff,0xff,0x70,0x00,0xff,0xff,0xc3,0x00,0xbf,0xff,
+ 0x87,0x01,0xd7,0xff,0x1f,0x02,0x97,0x9f,0x3c,0x04,0x0e,0x4f,0x78,0x04,0x38,
+ 0x20,0xe0,0x08,0xf0,0x00,0xe0,0x01,0x80,0x0f,0x20,0x03,0x00,0xf8,0x07,0x01,
+ 0x00,0x00,0x38,0x00,0x00,0x00,0x40,0x02};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/face_happy.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define noname_width 32
+#define noname_height 32
+static char noname_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x0f,0x00,0x00,0x1c,0x70,
+ 0x00,0x00,0x02,0x80,0x00,0x80,0x01,0x00,0x03,0x40,0x00,0x00,0x04,0x40,0x00,
+ 0x00,0x04,0x20,0x00,0x00,0x08,0x10,0x20,0x08,0x10,0x10,0x20,0x08,0x10,0x10,
+ 0x20,0x08,0x10,0x08,0x20,0x08,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,
+ 0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,
+ 0x20,0x10,0x00,0x00,0x10,0x10,0x08,0x20,0x10,0x10,0x04,0x40,0x10,0x20,0x0a,
+ 0xa0,0x08,0x40,0x30,0x18,0x04,0x40,0xc0,0x07,0x04,0x80,0x01,0x00,0x03,0x00,
+ 0x02,0x80,0x00,0x00,0x1c,0x70,0x00,0x00,0xe0,0x0f,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/fh Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,8 @@
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments:
+Return Value:
+Effects:
+Constraints:
+Rev:
+---------------------------------------------------------------*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/get_skew.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,97 @@
+#include "get_skew.h"
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+#include "tcl_interface.h"
+
+int docommand(char* fmt, ...);
+
+struct angle_and_sd
+{
+ double angle;
+ double sd;
+};
+
+#define DEBUG_GET_SKEW 1
+
+double get_skew(RLEMap* r)
+/*--------------------------------------------------------------
+Primary Purpose: Determine the angle of rotation of the RLEMap r
+Arguments: pointer to an RLEMap
+Return Value: detected angle of rotation
+Rev: AR
+---------------------------------------------------------------*/
+{
+
+ int i,j,max_index;
+ double max_sd = 0;
+ double test_angle, angular_skip, angle_increment;
+ Histogram* temp_hist;
+ angle_and_sd Angles_and_Sds[20];
+
+ struct timeval tv;
+ double start_time;
+ gettimeofday(&tv, NULL);
+ start_time = (double)tv.tv_sec;
+ printf("Starting deskew process: time = %lf\n", ((double) tv.tv_sec) - start_time);
+ if(ENABLE_USER_INTERFACE)
+ set_status("Finding Skew. . .");
+/* first run, angular spacing = 1 degree */
+
+ test_angle = -1;
+ angular_skip = 1;
+ double loop_start_angle = 0;
+ double loop_step = 1;
+ for(i = 0; i < 2; i++)
+ {
+ temp_hist = project_histogram(r, loop_start_angle);
+ Angles_and_Sds[0].angle = loop_start_angle;
+ Angles_and_Sds[0].sd = temp_hist->get_standard_dev();
+
+ temp_hist = project_histogram(r, loop_start_angle + loop_step);
+ Angles_and_Sds[1].angle = loop_start_angle + loop_step;
+ Angles_and_Sds[1].sd = temp_hist->get_standard_dev();
+
+ if(Angles_and_Sds[1].sd > Angles_and_Sds[0].sd)
+ {
+ angle_increment = loop_step;
+ max_index = 1;
+ test_angle = Angles_and_Sds[1].angle + angle_increment;
+ }
+ else
+ {
+ max_index = 0;
+ angle_increment = -loop_step;
+ test_angle = Angles_and_Sds[0].angle + angle_increment;
+ }
+ for(j = 2; j < 10; j++)
+ {
+ temp_hist = project_histogram(r, test_angle);
+ Angles_and_Sds[j].angle = test_angle;
+ Angles_and_Sds[j].sd = temp_hist->get_standard_dev();
+ if(Angles_and_Sds[j].sd > Angles_and_Sds[max_index].sd)
+ {
+ max_index = j;
+ test_angle += angle_increment;
+ }
+ else
+ break;
+ }
+ loop_start_angle = Angles_and_Sds[max_index].angle;
+ loop_step = 0.1;
+ }
+ gettimeofday(&tv, NULL);
+ /* printf("Done findinge skew. Elapsed time = %lf\n", ((double) tv.tv_sec) - start_time); */
+ if(ENABLE_USER_INTERFACE)
+ {
+ set_status("Skew angle = %.3lf", Angles_and_Sds[max_index].angle);
+ update();
+ }
+ return Angles_and_Sds[max_index].angle;
+}
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/get_skew.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,8 @@
+#ifndef GETSKEW
+#define GETSKEW 1
+#include "project.h"
+#include "histogram.h"
+
+double get_skew(RLEMap* r);
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/global_menu.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,1 @@
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/grey.25 Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,6 @@
+#define grey_width 16
+#define grey_height 16
+static char grey_bits[] = {
+ 0x11, 0x11, 0x44, 0x44, 0x11, 0x11, 0x44, 0x44, 0x11, 0x11, 0x44, 0x44,
+ 0x11, 0x11, 0x44, 0x44, 0x11, 0x11, 0x44, 0x44, 0x11, 0x11, 0x44, 0x44,
+ 0x11, 0x11, 0x44, 0x44, 0x11, 0x11, 0x44, 0x44};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/histogram.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,84 @@
+#include <math.h>
+#include "histogram.h"
+#include "tcl_interface.h"
+
+void Histogram::initialize_mean()
+{
+ int total_weight = 0;
+ int i;
+ for(i = 0; i < num_rows; i++)
+ {
+ total_weight = total_weight + row_weights[i];
+ }
+ mean = (double) total_weight / (double) num_rows;
+}
+
+inline double sqr(double x) {return(x*x);}
+
+void Histogram::initialize_variance()
+{
+ double sum = 0;
+ for(int i = 0; i < num_rows; i++)
+ {
+ sum += sqr(row_weights[i] - mean);
+ }
+ variance = (double) sum / (double) num_rows;
+}
+
+void Histogram::initialize_standard_dev()
+{
+ standard_dev = sqrt(variance);
+}
+#define SKIP 5
+/* should get this skip from project.cc (!!) */
+Histogram::display()
+{
+#if 0
+ docommand(".histogram.c delete hist");
+ docommand(".histogram.c create text 275 300 -font -adobe-helvetica-medium-o-normal--34-240-100-100-p-176-iso8859-1 -text \"%.3lf degrees, SD = %.3lf\" -tags hist", cut_angle, standard_dev);
+ for(int i = 0; i < num_rows; i++)
+ {
+ docommand(".histogram.c create line 0 %d %d %d -fill blue -tags {hist} -width %d", i*SKIP, row_weights[i], i*SKIP, SKIP);
+ }
+ docommand("update");
+#endif
+}
+
+Histogram::Histogram(int n_rows, int* r_weights, double c_angle)
+{
+ num_rows = n_rows;
+ row_weights = r_weights;
+ cut_angle = c_angle;
+ initialize_mean();
+ initialize_variance();
+ initialize_standard_dev();
+}
+
+
+
+
+
+
+
+
+
+
+double find_int_sd(int* values, int num_values)
+/* find the standard deviation of an array of ints */
+{
+ int total_weight = 0;
+ int i;
+ for(i = 0; i < num_values; i++)
+ {
+ total_weight = total_weight + values[i];
+ }
+ double mean_weight = (double) total_weight / (double) num_values;
+ double standard_sum = 0;
+ for(i = 0; i < num_values; i++)
+ {
+ standard_sum += sqr(values[i] - mean_weight);
+ }
+ standard_sum = standard_sum / (double) num_values;
+ return sqrt(standard_sum);
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/histogram.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,49 @@
+#ifndef HISTOGRAM
+#define HISTOGRAM 1
+
+class Histogram
+{
+/* just an array of integers with some statistical information */
+/* consider implementing with an intarray (from hw1) instead */
+ private:
+ double mean;
+ double variance;
+ double standard_dev;
+
+ int num_rows;
+ int* row_weights;
+ double cut_angle;/* this doesn't really belong here, but who cares */
+ void initialize_mean();
+ void initialize_variance();
+ void initialize_standard_dev();
+ public:
+ display();
+ Histogram(int n_rows, int* r_weights, double c_angle);
+ inline int get_row_weight(int row)
+ {
+ return row_weights[row];
+ }
+ inline int get_num_rows()
+ {
+ return num_rows;
+ }
+ inline double get_mean()
+ {
+ return mean;
+ }
+ inline double get_standard_dev()
+ {
+ return standard_dev;
+ }
+ inline double get_variance()
+ {
+ return variance;
+ }
+ inline double get_cut_angle()
+ {
+ return cut_angle;
+ }
+};
+
+#endif
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/init_small_display.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,21 @@
+# the windows in here were for debugging skew etc.
+# not all that important anymore
+toplevel .t
+wm geometry .t 600x710+0+0
+wm title .t "Image Display"
+frame .t.f -width 600 -height 710
+pack .t.f
+
+canvas .t.f.c -xscrollcommand ".t.f.xscroller set" -yscrollcommand ".t.f.yscroller set" -width 700 -height 700 -background gray -xscrollincrement 40 -yscrollincrement 40
+
+scrollbar .t.f.xscroller -command ".t.f.c xview" -orient horizontal
+scrollbar .t.f.yscroller -command ".t.f.c yview"
+pack .t.f.xscroller -side bottom -fill x
+pack .t.f.c .t.f.yscroller -side left -fill y
+# .t.f.c create bitmap 100 100 -bitmap @/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/tif/test
+toplevel .histogram
+wm title .histogram "Skew Projections"
+wm geometry .histogram 500x800+620+0
+canvas .histogram.c -width 800 -height 800
+pack .histogram.c
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/learn.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,558 @@
+/*--------------------------------------------------------------
+ Learn.cc -
+ readlearnfiles - sources the tcl file to change learn files
+ learn(char * tifffile, char * asciifile)
+ Performs character learning by reading tiff and ascii translation
+ Characters are partitioned into character groups as described
+ in system.cc. See learn function for more details
+
+ writeLearnedGroups(char * filename) Writes learned character to file
+ readLearnedGroups(char * filename) Reads saved learned characters
+ from file.
+---------------------------------------------------------------*/
+#include "tcl_interface.h"
+#include "system.h"
+#include "learn.h"
+#include "Page.h"
+#include "list.h"
+
+void readLearnFiles()
+/*--------------------------------------------------------------
+Primary Purpose: Sources learnfile.tcl where new learn files can be
+specified without recompiling **/
+{
+ docommand("source learnfile.tcl");
+}
+
+bool whitespace(char c)
+// Returns TRUE if c is a whitespace charater (called by learn.cc)
+{
+ if ( c == '\n' || c == '\t' || c == ' ') return TRUE;
+ return FALSE;
+
+}
+
+bool blank(char * string)
+{
+ if (string == NULL) return TRUE;
+ int len = strlen(string);
+ for(int c=0; c< len; c++)
+ {
+ if (!(whitespace(string[c])))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+void printLearnedGroups()
+{
+ // Just print these guys out to make sure they are ok.
+ for(unsigned int i = 0; i < NumCharGroups; i++)
+ for(ListElement * ptr = LearnedGroups[i].first;
+ ptr != NULL; ptr = ptr->next)
+ { Component * item = (Component *) ptr->item;
+ printf("learned char %s, group %d\n", item->fasciiId,
+ item->charGroup);
+ }
+
+}
+
+int lengthNextWord(char * buffer,int offset, int buflength)
+{
+ // counts things in '< >' as one character
+ int count;
+
+ for(int c=offset; c < buflength && !(whitespace(buffer[c])); c++)
+ {
+ if(buffer[c] == '<')
+ {
+ while((buffer[c] != '>') && (c < buflength))
+ c++;
+ count++;
+ }
+ else
+ count++;
+ }
+ return count;
+}
+
+
+int learn(Component * comp, char * id, Confidence threshold)
+/*--------------------------------------------------------------
+Primary Purpose: Make a copy of this component and add it to
+ LearnedGroups. id is ascii identification.
+ Component will only be learned if confidence
+ is below threshold or if id and asciiid dont match
+Arguments: comp - component to learn
+ id - ascii identification
+ threshold - confidence threshold for learning
+Return Value: 1 if component was learned, 0 otherwise
+Rev: 4/25/96
+---------------------------------------------------------------*/
+{
+ Component * newcomp;
+
+ if (comp->confid() < threshold || !(strcmp(comp->fasciiId, id)))
+ {
+ newcomp = comp->copy();
+ delete newcomp->fasciiId;
+ newcomp->fasciiId = new char[strlen(id)+1];
+ strcpy(newcomp->fasciiId , id);
+
+ LearnedGroups[newcomp->charGroup].Append(newcomp);
+ return 1;
+ }
+ return 0;
+}
+
+void learn(char * tifFile, char * asciiFile, bool synchwords)
+/*--------------------------------------------------------------
+Primary Purpose: Learns from TIFF and ascii file. Groups learned
+ characters by baseline into LearnedGroups and
+ sets properties.
+Arguments: tiffFile name of a tiff file to learn from
+ asciiFile name of an ascii translation file
+Effects: Assumes a one to one correspondence between each connected
+component on a line of the tif file and each character on the corresponding
+line of the ascii file.
+
+Rev: 4/26/96
+---------------------------------------------------------------------*/
+{
+
+ Page * learnPage = new Page;
+ initCharBitsSet();
+ if(learnPage->readMap(tifFile) != VALID)
+ {
+ printf("Problem opening the learn image file (file doesn't exist?)\n");
+ return;
+ }
+ learnPage->setLines();
+ learnPage->extractComponents(MinHorizSeparation);
+ learnPage->extractWords();
+ learn(learnPage, asciiFile, synchwords);
+
+ // delete learnPage;
+
+}
+
+
+void learn(Page * learnPage, char * asciiFile, bool synchWords)
+/*--------------------------------------------------------------
+Primary Purpose: Learns from a Page and an ascii file. Used from
+ tcl user interface under File/Learn opation
+ Groups learned
+ characters by baseline into LearnedGroups and
+ sets properties.
+Arguments: tiffFile name of a tiff file to learn from
+ asciiFile name of an ascii translation file
+Effects: Assumes a one to one correspondence between each connected
+component on a line of the tif file and each character on the corresponding
+line of the ascii file.
+
+Rev: 4/26/96
+---------------------------------------------------------------*/
+{
+ FILE * transFile;
+
+ transFile = fopen(asciiFile,"r");
+ if(!transFile)
+ {
+ printf("Could not open the ascii learn file");
+ return;
+ }
+ if (LearnedGroups == NULL)
+ LearnedGroups = new Components[NumCharGroups];
+
+ int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize;
+ char buffer[maxCharsPerLine];
+ int i = -1;
+ int buflength=0;
+ bool instring= FALSE;
+ bool emptyLine;
+ Components * components = NULL;
+ Words * words;
+ Component * item;
+
+ double width, height = 0.0;
+ int h;
+
+
+ words = learnPage->words();
+ int c = 0;
+ Word * word;
+
+ for (ListElement * ptr = words->first; ptr != NULL &&
+ (i < learnPage->numLines()) ; ptr = ptr->next)
+ {
+
+ word = (Word *) ptr->item;
+ // if new line get new text line
+ if (word->characters[0] == '\n' || buflength == 0)
+ {
+ char * ok;
+ do {
+ ok =fgets(buffer, maxCharsPerLine, transFile);
+ } while (ok && blank(buffer)); // skip blank lines.
+ buflength= strlen(buffer);
+ components = learnPage->line(++i);
+ c =0;
+ if (word->characters[0] == '\n') continue;
+ }
+
+
+ // skip over white space
+ while(whitespace(buffer[c]) && c < buflength)c++;
+
+ // Make sure we have an equal # of components characters
+ if (synchWords &&
+ (word->charCount == lengthNextWord(buffer,c,buflength)))
+ {
+ // skip over this word
+ while(!(whitespace(buffer[c])) && c < buflength)
+ c++;
+ continue; // move on to the next word
+ }
+
+ for (int ch = 0; ch < word->charCount; ch++)
+ {
+ while(whitespace(buffer[c]) && c < buflength)c++;
+ item = word->character[ch];
+ if (c >= buflength) break;
+
+ // Link string translation to component. Characters between
+ // brackets are for one component.
+ if(buffer[c] == '<' && !instring)
+ {
+ instring = TRUE;
+ int startString = c;
+ while(c++ < buflength && buffer[c] != '>');
+ int endString = c+1;
+
+ int stringSize = endString - startString;
+ char newstring[stringSize+1];
+ strncpy(newstring, &buffer[startString],stringSize);
+ newstring[stringSize] = '\0';
+ // learn if id's don't match or below threshold
+ learn(item, newstring, ConfidenceThreshold);
+ c++;
+ instring = FALSE;
+ }
+ else
+ {
+ char newstring[2];
+ newstring[0] = buffer[c++];
+ newstring[1]= '\0';
+ learn(item, newstring, ConfidenceThreshold);
+ }
+
+ LearnedGroups[item->charGroup].Append(item);
+ //ptr->item = NULL; // Set to Null in page so it wont get
+ // clobbered on delete
+ h = item->lr().y() - item->ul().y();
+ if (h > height) height = h;
+ width = item->lr().x() - item->ul().x();
+ if (height/width > MaxHWRatio)
+ MaxHWRatio = height/width;
+
+ if (h/width < MinHWRatio)
+ MinHWRatio = h/width;
+
+ if (width < MinWidth)
+ MinWidth = (int) width;
+
+
+ }
+ }
+
+
+
+
+ if (fgets(buffer, maxCharsPerLine, transFile))
+ printf("Uh, oh. There are more characters to learn!\n");
+ /* printf("Maximum height/width ratio = %f\n", MaxHWRatio); */
+ /* printf("Minimum height/width ratio = %f\n", MinHWRatio); */
+
+
+ // printLearnedGroups();
+
+}
+
+
+int writeLearnedGroups(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write Learned groups out to file for reading
+ in by readLearnedGroups
+Arguments: filename to write learned chars to
+Return Value: 1 if successful 0 if not
+Effects: Writes contents of LearnedGroups array out to filename
+LearnedGroups is an array of lists of components that is decleared
+in system.cc and initialized by the learn() function.
+For each group writes the number of Components the group contains
+followed by the group data.
+Other learned values such as MinWidth MinHWRatio etc are written to
+the file as well.
+Constraints: LearnedGroups must be initialized and filled with learned
+chars before this function is invoked.
+Rev: 11/27 KM
+---------------------------------------------------------------*/
+{
+ int status;
+ FILE * outfile;
+ assert(LearnedGroups != NULL);
+
+ outfile = fopen(filename, "w");
+ if (outfile == NULL)
+ {
+ printf("error openning %s \n", filename);
+ return 0;
+ }
+
+ // Write global information about learned characters
+
+ fwrite(&NumCharGroups, sizeof(NumCharGroups),1, outfile);
+ fwrite(&MaxHWRatio, sizeof(MaxHWRatio),1, outfile);
+ fwrite(&MinWidth, sizeof(MinWidth),1,outfile);
+ fwrite(&MinHWRatio, sizeof(MinHWRatio),1,outfile);
+ for(unsigned int i = 0; i < NumCharGroups; i++)
+ {
+ unsigned int numChars = LearnedGroups[i].length;
+ // Write group number and number of characters
+ fwrite(&i, sizeof(i), 1, outfile);
+ status = fwrite(&numChars, sizeof(numChars),1,outfile);
+ if (status == 0) return 0;
+ for(ListElement * ptr = LearnedGroups[i].first;
+ ptr != NULL; ptr = ptr->next)
+ {
+
+ Component * comp = (Component *) ptr->item;
+
+ status = fwrite(comp, sizeof(Component),1,outfile);
+// printf("\tChar:%c status:%d \n", comp->asciiId(), status);
+ int stringSize = strlen(comp->fasciiId) +1;
+ status = fwrite(&stringSize, sizeof(stringSize),1,outfile);
+ status = fwrite(comp->fasciiId, stringSize,1,outfile);
+ for(int p = 0; p < numProperties; p++)
+ {
+ status = fwrite(&(comp->fproperty[p]),
+ sizeof(Property),
+ 1, outfile);
+ if (status == 0)
+ {
+ printf("Error writing properties of comp %c",
+ comp->asciiId());
+ return 0;
+ }
+ }
+ }
+ }
+ status = fclose(outfile);
+ if (status == -1) return 0;
+ else return 1;
+
+}
+
+int readLearnedGroups(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read Learned groups from file that has been
+ created by writeLearnedGroups
+Arguments: filename to read learned chars from
+Return Value: 1 if successful 0 if not
+Effects: Reads contents of filename into LearnedGroups array
+LearnedGroups is an array of lists of components that is decleared
+in system.cc and initialized here or in the learn() function.
+Constraints: LearnedGroups must not yet be initialized
+Rev: 11/27 KM
+---------------------------------------------------------------*/
+{
+ int status;
+ FILE * infile;
+ unsigned int numGroups; // # of groups stored in file.
+
+ initCharBitsSet();
+ if(LearnedGroups == NULL)
+ LearnedGroups = new Components[NumCharGroups];
+
+
+ infile = fopen(filename, "r");
+ if (infile == NULL)
+ {
+ printf("error openning %s \n", filename);
+ return 0;
+ }
+
+ // Read Globals
+ fread(&numGroups, sizeof(numGroups),1, infile);
+ assert(numGroups == NumCharGroups);
+ fread(&MaxHWRatio, sizeof(MaxHWRatio),1, infile);
+ fread(&MinWidth, sizeof(MinWidth),1,infile);
+ fread(&MinHWRatio, sizeof(MinHWRatio),1,infile);
+ for(unsigned int i = 0; i < NumCharGroups; i++)
+ {
+ unsigned int groupnum;
+ unsigned int numChars;
+ fread(&groupnum, sizeof(groupnum), 1, infile);
+ assert(groupnum == i);
+ fread(&numChars, sizeof(numChars),1,infile);
+
+ printf("\nReading group %d - %d characters\n",i,numChars);
+ for(unsigned int c = 0; c< numChars; c++)
+ {
+ Component * comp = new Component;
+ short int * savepropptr = comp->fproperty;
+
+ status = fread(comp, sizeof(Component),1,infile);
+ int stringSize;
+ status = fread(&stringSize, sizeof(stringSize),1,infile);
+ comp->fasciiId = new char[stringSize];
+ status = fread(comp->fasciiId, stringSize,1,infile);
+
+ comp->fproperty = savepropptr;
+
+ for(int p = 0; p < numProperties; p++)
+ {
+ status = fread(&(comp->fproperty[p]), sizeof(Property),
+ 1, infile);
+ if (status == 0)
+ {
+ printf("Error reading properties");
+ return 0;
+ }
+ }
+// printf("\tChar:%c status:%d ", comp->asciiId(), status);
+// printVector(comp->properties(), numProperties);
+ LearnedGroups[i].Append(comp);
+
+ }
+
+ }
+ status = fclose(infile);
+ if (status == -1) return 0;
+ else return 1;
+}
+
+void testLearn()
+{
+
+ learn("/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/train.tif",
+ "/amd/nfs/cochise/home/ee/cs169/fa95class/cs169-ab/train.txt");
+}
+
+/*****************************************************************
+ FUNCTIONS BEYOND THIS POINT ARE FOR AVERAGING LEARNED CHARACTERS
+ AND ARE NOT CURRENTLY USED.
+*******************************************************************/
+
+void initLearnedChars()
+/*--------------------------------------------------------------
+Primary Purpose: Initializes learned character array. Sets asciiId
+to array offset.
+Rev: KM 11/6/95
+---------------------------------------------------------------*/
+{
+ LearnedChars = new Component[256];
+
+ for (int i=0; i < 256; i++)
+ {
+ LearnedChars[i].asciiId() = (char)i;
+ }
+
+}
+
+void oldlearn(char * tifFile, char * asciiFile)
+/*--------------------------------------------------------------
+Primary Purpose: builds property vectors for LearnedChars array
+Arguments: tiffFile name of a tiff file to learn from
+ asciiFile name of an ascii translation file
+Effects: Assumes a one to one correspondence between each connected
+component on a line of the tif file and each character on the corresponding
+line of the ascii file. For learned characters confidence is set
+to the number of examples.
+
+Rev: 11/6/95
+---------------------------------------------------------------*/
+{
+ FILE * transFile;
+ transFile = fopen(asciiFile,"r");
+ Page * learnPage = new Page;
+ initCharBitsSet();
+ learnPage->readMap(tifFile);
+ learnPage->setLines();
+ learnPage->extractComponents(MinHorizSeparation); /* why minlinesize? */
+ int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize;
+ char buffer[maxCharsPerLine];
+ int i = 0;
+ int buflength;
+ bool emptyLine;
+ Components * components;
+ Component * item;
+ int count[256]; // a count of how many of each char have been encountered
+ int prop[256][numProperties]; // Character property sums. Need ints so that
+ // property sum does
+ // not exceed char boundaries
+ char id;
+
+ initLearnedChars();
+ for (i = 0; i < 256; i++)
+ {
+ count[i] = 0;
+ for (int p = 0; p < numProperties; p++)
+ prop[i][p] = 0;
+ }
+ i=0;
+
+ int offset;
+ while (i < learnPage->numLines() &&
+ fgets(buffer, maxCharsPerLine, transFile))
+ {
+ buflength = strlen(buffer);
+ components = learnPage->line(i++);
+ int c = 0;
+ for (ListElement* ptr = components->first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ item = (Component *)(ptr->item);
+ // skip over white space
+ while(whitespace(buffer[c]) && c < buflength)c++;
+ if (c >= buflength)break;
+ id = buffer[c++];
+ count[id]++; // increment character count
+ for (offset=0; offset < numProperties; offset++)
+ prop[id][offset] += (item->properties())[offset];
+ LearnedChars[i].numBits() += item->numBits();
+ }
+ }
+ // now divide by count and put in Learned character
+ for(int j = 0; j < 256; j++)
+ {
+ if(count[j] > 0)
+ {
+ for (int offset=0; offset < numProperties; offset++)
+ prop[j][offset] /= count[j];
+ LearnedChars[j].numBits() /= count[j];
+ LearnedChars[j].confid() = count[j];
+ for (offset=0; offset < numProperties; offset++)
+ (LearnedChars[j].properties())[offset] = prop[j][offset];
+// printf("%d occurrences of %c\n", count[j], (char)j);
+ printVector(LearnedChars[j].properties(), numProperties);
+
+ }
+
+ }
+}
+
+void oldtestLearn()
+{
+
+
+ learn("train.tif", "train.txt");
+ if (ENABLE_USER_INTERFACE)
+ docommand(".main_window.display.work_space delete IMAGE_TAG");
+}
+
+
+
+
+
+
Binary file reference/ocr-new/learn.dat has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/learn.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,27 @@
+// learn.h
+// Functions for use with LearnedcharGroups array
+class Page;
+
+void learn(Page * p, char * asciiFile, bool synchWords=FALSE);
+void learn(char * tifFile, char * asciiFile, bool synchWords=FALSE);
+int learn(Component * comp, char * id, Confidence threshold);
+void readLearnFiles(); // sources learnfile.tcl where learn files can be set
+
+int writeLearnedGroups(char * filename); // Save learned characters
+int readLearnedGroups(char * filename); // read back saved characters
+void testLearn();
+
+// oldlearn for use with LearnedChars array
+// averaging learning ***NOT CURRENTLY USED ***
+void oldlearn(char * tifFile, char * asciiFile); // averages values
+void initLearnedChars(); // sets learned charactere array to 0
+
+
+
+
+
+
+
+
+
+
Binary file reference/ocr-new/learned has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/learnfile.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,13 @@
+# one line each for files that need to be learned
+# form: LEARN <imagefile> <textfile> <synchwords bool>
+# or: LEARN_DATA <datafile>
+set DONT_SYNCH_WORDS 0
+set SYNCH_WORDS 1
+
+# LEARN TIF/tomquotes.tif tomquotes.txt
+#LEARN train.tif train.txt $DONT_SYNCH_WORDS
+#LEARN 4.header.tif 4.header.txt $SYNCH_WORDS
+#LEARN 4.col0.tif 4.col0.txt $SYNCH_WORDS
+LEARN_DATA learn.dat
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/line_info.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,41 @@
+#ifndef LINE_INFO
+#define LINE_INFO 1
+
+class LineMarker {
+ private:
+ int start_row;
+ int end_row;
+ public:
+ LineMarker(int st, int fi);
+ inline int get_start_row()
+ {
+ return start_row;
+ }
+ inline int get_end_row()
+ {
+ return end_row;
+ }
+};
+
+class LineInfo {
+ private:
+ int num_lines;
+ LineMarker* line_marks;
+ public:
+ LineInfo(LineMarker* line_m, int num_l);
+ inline int get_num_lines()
+ {
+ return num_lines;
+ }
+ inline int get_line_n_start(int n)
+ {
+ return line_marks[n].get_start_row();
+ }
+ inline int get_line_n_end(int n)
+ {
+ return line_marks[n].get_end_row();
+ }
+
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/link.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,68 @@
+#include <tcl.h>
+#include "link.h"
+#include "tcl_interface.h"
+#include "system.h"
+
+/*
+
+To link tcl and C variables: (variable num)
+
+int num;
+LINK_VARIABLE(num, INT);
+
+make sure the variable is external and in system.h
+so that everyone can get at it
+
+*/
+
+
+int DISPLAY_SPELLING_MISTAKES = 0;
+extern Tcl_Interp* TCL_ip;
+
+#define LINK_VARIABLE(name, type) \
+Tcl_LinkVar(TCL_ip, #name, (void*) &name, TCL_LINK_##type)
+
+int init_link_vars()
+{
+ LINK_VARIABLE(ENABLE_USER_INTERFACE, INT); // 0 turns the display off
+ LINK_VARIABLE(VERY_LOW_CONFIDENCE, INT); // Words that display in red
+ LINK_VARIABLE(LOW_CONFIDENCE, INT); // Words that will be displayed in blue
+ LINK_VARIABLE(DISPLAY_LINE_BOUNDARIES, INT); // Words that display in blue
+ LINK_VARIABLE(DISPLAY_BOUNDING_BOXES, INT); // Display component boundaries
+ LINK_VARIABLE(SPELLCHECK, INT); // should the page be spellchecked
+
+
+ LINK_VARIABLE(DISPLAY_IMAGE, INT); // display the scanned image?
+ LINK_VARIABLE(DESKEW_METHOD, INT); // 1 for rle, 0 for bitmap
+ LINK_VARIABLE(DISPLAY_SPELLING_MISTAKES, INT);
+ LINK_VARIABLE(SCALE_FACTOR, DOUBLE);
+ LINK_VARIABLE(NoiseTolerance, INT);// Minimum num of pixels in line of text
+
+ LINK_VARIABLE(MinLineSize, INT); // Minimum number of rows in line of text
+ LINK_VARIABLE(MinVertSeparation, INT); // Minimum number of rows between lines of text
+ LINK_VARIABLE(MinHorizSeparation, INT); // Minimum number of blank pixels btween chars
+ LINK_VARIABLE(ConfidenceThreshold, INT);// Minimum confidence for some operations
+ LINK_VARIABLE(JoinTolerance, INT); // Maximum number of pixels
+ // joining two fused characters
+
+ // The next 4 are used in character grouping
+ LINK_VARIABLE( MaxVertSize, INT); // Max vert pixels in char
+ LINK_VARIABLE( BaseLineTolerance, INT); // How far from baseline is
+ // okay 1/INT * linesize
+ LINK_VARIABLE( TopLineTolerance, INT); // How far from topline
+ //is okay 1/INT *linesize
+
+
+ /* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+ LINK_VARIABLE( NumHorizDiv, INT); //Number of horizontal divisions
+ LINK_VARIABLE( NumVertDiv, INT); //Number of vertical divisions
+ LINK_VARIABLE( SCALE_FACTOR, DOUBLE);
+ LINK_VARIABLE( ZONING_SCALE_FACTOR, DOUBLE);
+}
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/link.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,48 @@
+#ifndef link__h
+#define link__h
+
+int init_link_vars();
+extern int ENABLE_USER_INTERFACE;
+extern int DISPLAY_SPELLING_MISTAKES;
+extern double ZONING_SCALE_FACTOR;
+extern double SCALE_FACTOR;
+extern int NoiseTolerance; // Minimum number of pixels in row of text
+extern int MinLineSize; // Minimum number of rows in a line of text
+extern int MinVertSeparation; // Minimum number of rows between lines of tex
+extern int MinHorizSeparation; // Minimum number of blank pixels btween chars
+extern int ConfidenceThreshold;// Minimum confidence for some operations
+extern int JoinTolerance; // Maximum number of pixels in a column
+ // joining two fused characters
+
+extern double MaxHWRatio; // Max H/W ratio of learned set
+extern int MinWidth; // minimum component width in learned set
+
+
+
+// The next three are used in character grouping
+extern unsigned int NumCharGroups;
+extern int MaxVertSize; // Max vert pixels in char (used for baseline)
+extern int BaseLineTolerance; // How far from baseline is okay 1/%linesize
+extern int TopLineTolerance; // How far from topline is okay 1/%linesize
+
+/* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+extern int NumHorizDiv; //Number of horizontal divisions
+extern int NumVertDiv; //Number of vertical divisions
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/link_vars.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,37 @@
+set VERY_LOW_CONFIDENCE 120
+set LOW_CONFIDENCE 160
+set DISPLAY_BOUNDING_BOXES 1
+set DISPLAY_LINE_BOUNDARIES 0
+set SPELLCHECK 0
+set ENABLE_USER_INTERFACE 1
+set DISPLAY_IMAGE 1
+# deskew method 0 uses bitmap rotation
+# 1 an RLEMap rotation
+# -1 no rotation at all
+set DESKEW_METHOD -1
+set SCALE_FACTOR 1
+set ZONING_SCALE_FACTOR .50
+
+set NoiseTolerance 3
+set MinLineSize 10
+set MinVertSeparation 0
+set MinHorizSeparation 1
+set ConfidenceThreshold 150
+
+set JoinTolerance 6
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/list.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,348 @@
+// list.cc
+//
+// Routines to manage a singly-linked list of "things".
+//
+// A "ListElement" is allocated for each item to be put on the
+// list; it is de-allocated when the item is removed. This means
+// we don't need to keep a "next" pointer in every object we
+// want to put on a list.
+//
+// NOTE: Mutual exclusion must be provided by the caller.
+// If you want a synchronized list, you must use the routines
+// in synchlist.cc.
+//
+// Copyright (c) 1992-1993 The Regents of the University of California.
+// All rights reserved. See copyright.h for copyright notice and limitation
+// of liability and disclaimer of warranty provisions.
+
+
+#include "list.h"
+
+//----------------------------------------------------------------------
+// ListElement::ListElement
+// Initialize a list element, so it can be added somewhere on a list.
+//
+// "itemPtr" is the item to be put on the list. It can be a pointer
+// to anything.
+// "sortKey" is the priority of the item, if any.
+//----------------------------------------------------------------------
+
+ListElement::ListElement(void *itemPtr, int sortKey)
+{
+ item = itemPtr;
+ key = sortKey;
+ next = NULL; // assume we'll put it at the end of the list
+ previous = NULL;
+}
+
+//----------------------------------------------------------------------
+// List::List
+// Initialize a list, empty to start with.
+// Elements can now be added to the list.
+//----------------------------------------------------------------------
+
+List::List()
+{
+ first = last = NULL;
+ length = 0;
+}
+
+//----------------------------------------------------------------------
+// List::~List
+// Prepare a list for deallocation. If the list still contains any
+// ListElements, de-allocate them. However, note that we do *not*
+// de-allocate the "items" on the list -- this module allocates
+// and de-allocates the ListElements to keep track of each item,
+// but a given item may be on multiple lists, so we can't
+// de-allocate them here.
+//----------------------------------------------------------------------
+
+List::~List()
+{
+ while (Remove() != NULL)
+ ; // delete all the list elements
+}
+
+//----------------------------------------------------------------------
+// List::Append
+// Append an "item" to the end of the list.
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, put it at the end.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+//----------------------------------------------------------------------
+
+void
+List::Append(void *item)
+{
+ ListElement *element = new ListElement(item, 0);
+
+ if (IsEmpty()) { // list is empty
+ first = element;
+ last = element;
+ } else { // else put it after last
+ last->next = element;
+ element->previous = last;
+ last = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::Prepend
+// Put an "item" on the front of the list.
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, put it at the beginning.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+//----------------------------------------------------------------------
+
+void
+List::Prepend(void *item)
+{
+ ListElement *element = new ListElement(item, 0);
+
+ if (IsEmpty()) { // list is empty
+ first = element;
+ last = element;
+ } else { // else put it before first
+ element->next = first;
+ first->previous = element;
+ first = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::Remove
+// Remove the first "item" from the front of the list.
+//
+// Returns:
+// Pointer to removed item, NULL if nothing on the list.
+//----------------------------------------------------------------------
+
+void *
+List::Remove()
+{
+ return SortedRemove(NULL); // Same as SortedRemove, but ignore the key
+ length--;
+}
+
+//----------------------------------------------------------------------
+// List::Mapcar
+// Apply a function to each item on the list, by walking through
+// the list, one element at a time.
+//
+// Unlike LISP, this mapcar does not return anything!
+//
+// "func" is the procedure to apply to each element of the list.
+//----------------------------------------------------------------------
+
+void
+List::Mapcar(VoidFunctionPtr func)
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ (*func)((int)ptr->item);
+ }
+}
+
+//----------------------------------------------------------------------
+// List::IsEmpty
+// Returns TRUE if the list is empty (has no items).
+//----------------------------------------------------------------------
+
+bool
+List::IsEmpty()
+{
+ if (first == NULL)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+//----------------------------------------------------------------------
+// List::SortedInsert
+// Insert an "item" into a list, so that the list elements are
+// sorted in increasing order by "sortKey".
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, walk through the list, one element at a time,
+// to find where the new item should be placed.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+// "sortKey" is the priority of the item.
+//----------------------------------------------------------------------
+
+void
+List::SortedInsert(void *item, int sortKey)
+{
+ ListElement *element = new ListElement(item, sortKey);
+ ListElement *ptr; // keep track
+
+ if (IsEmpty()) { // if list is empty, put
+ first = element;
+ last = element;
+ } else if (sortKey < first->key) {
+ // item goes on front of list
+ element->next = first;
+ first->previous = element;
+ first = element;
+ } else { // look for first elt in list bigger than item
+ for (ptr = first; ptr->next != NULL; ptr = ptr->next) {
+ if (sortKey < ptr->next->key) {
+ element->next = ptr->next;
+ element->previous = ptr;
+ ptr->next->previous = element;
+ ptr->next = element;
+ return;
+ }
+ }
+ last->next = element; // item goes at end of list
+ element->previous = last;
+ last = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::SortedRemove
+// Remove the first "item" from the front of a sorted list.
+//
+// Returns:
+// Pointer to removed item, NULL if nothing on the list.
+// Sets *keyPtr to the priority value of the removed item
+// (this is needed by interrupt.cc, for instance).
+//
+// "keyPtr" is a pointer to the location in which to store the
+// priority of the removed item.
+//----------------------------------------------------------------------
+
+void *
+List::SortedRemove(int *keyPtr)
+{
+ ListElement *element = first;
+ void *thing;
+
+ if (IsEmpty())
+ return NULL;
+
+ thing = first->item;
+ if (first == last) { // list had one item, now has none
+ first = NULL;
+ last = NULL;
+ } else {
+ first = element->next;
+ if (first != NULL)
+ first->previous = NULL;
+ }
+ if (keyPtr != NULL)
+ *keyPtr = element->key;
+ delete element;
+ length--;
+ return thing;
+}
+
+
+void * List::removeElement(void * item)
+ // find this item in list and remove it
+ // return item pointer or NULL
+{
+ for (ListElement * ptr = first; ptr!=NULL; ptr= ptr->next)
+ {
+ if (ptr->item == item)
+ {
+ removeAt(ptr);
+ return item;
+ }
+ }
+ return NULL;
+
+}
+
+void List::insertAfter(ListElement * listEl, void *item)
+ // insert a new item after this one
+{
+ ListElement *newElement = new ListElement(item, 0);
+ newElement->next = listEl->next;
+ newElement->previous = listEl;
+ listEl->next = newElement;
+ if (last == listEl)
+ last = newElement;
+ length++;
+}
+
+
+void List::insertBefore(ListElement * listEl, void *item)
+ // insert a new item before this one
+{
+ ListElement *newElement = new ListElement(item, 0);
+ newElement->next = listEl;
+ newElement->previous = listEl->previous;
+ listEl->previous = newElement;
+ if (first == listEl)
+ first = newElement;
+ length++;
+}
+
+
+
+void List::removeAt(ListElement * listEl)
+ // removes listEl from the list. Do not delete it from memory
+{
+ if(first != listEl)
+ {
+ (listEl->previous)->next = listEl->next;
+ }
+ else
+ {
+ first = listEl->next;
+ }
+ if(last != listEl)
+ (listEl->next)->previous = listEl->previous;
+ else
+ last = listEl->previous;
+
+ if (first != NULL) first->previous = NULL;
+ if (last != NULL) last->next = NULL;
+
+
+ delete listEl;
+ length --;
+}
+
+
+
+void List::printList()
+{
+ int i=0;
+ printf("length= %d first = %u last = %u\n", length, first, last);
+ for(ListElement * ptr = first; ptr != NULL; ptr=ptr->next)
+ {
+ if ((i++%3) == 0) printf("\n");
+ printf(" ( %u, %u, %u ) ", ptr->previous, ptr, ptr->next);
+ }
+ printf("\n");
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/list.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,96 @@
+
+// list.h
+// Data structures to manage LISP-like lists.
+//
+// As in LISP, a list can contain any type of data structure
+// as an item on the list: thread control blocks,
+// pending interrupts, etc. That is why each item is a "void *",
+// or in other words, a "pointers to anything".
+//
+// Copyright (c) 1992-1993 The Regents of the University of California.
+// All rights reserved. See copyright.h for copyright notice and limitation
+// of liability and disclaimer of warranty provisions.
+
+#ifndef LIST_H
+#define LIST_H
+#include <bool.h>
+#include <stdlib.h>
+
+// This declares the type "VoidFunctionPtr" to be a "pointer to a
+// function taking an integer argument and returning nothing". With
+// such a function pointer (say it is "func"), we can call it like this:
+//
+// (*func) (17);
+//
+// Used by MapCar in list.h
+
+typedef void (*VoidFunctionPtr)(int arg);
+
+// The following class defines a "list element" -- which is
+// used to keep track of one item on a list. It is equivalent to a
+// LISP cell, with a "car" ("next") pointing to the next element on the list,
+// and a "cdr" ("item") pointing to the item on the list.
+//
+// Internal data structures kept public so that List operations can
+// access them directly.
+
+class ListElement {
+ public:
+ ListElement(void *itemPtr, int sortKey); // initialize a list element
+
+ ListElement *next; // next element on list,
+ // NULL if this is the last
+ ListElement *previous; // previous element on the list
+ // NULL if this is the first element
+
+ insertAfter(void *item); // insert a new item after this one
+ insertBefore(void *item); // insert a new item before this one
+ remove(void * item); // remove item with this key
+
+ int key; // priority, for a sorted list
+ void *item; // pointer to item on the list
+};
+
+// The following class defines a "list" -- a singly linked list of
+// list elements, each of which points to a single item on the list.
+//
+// By using the "Sorted" functions, the list can be kept in sorted
+// in increasing order by "key" in ListElement.
+
+class List {
+ public:
+ List(); // initialize the list
+ ~List(); // de-allocate the list
+
+ void Prepend(void *item); // Put item at the beginning of the list
+ void Append(void *item); // Put item at the end of the list
+ void *Remove(); // Take item off the front of the list
+
+ void Mapcar(VoidFunctionPtr func); // Apply "func" to every element
+ // on the list
+ bool IsEmpty(); // is the list empty?
+ void printList();
+
+ void insertAfter(ListElement * listEl, void *item);
+ void insertBefore(ListElement * listEl, void *item);
+ void removeAt(ListElement * listEl);
+ void * removeElement(void * item);
+ // find this item in list and remove it
+
+ // Routines to put/get items on/off list in order (sorted by key)
+ void Insert(void *item){SortedInsert(item,0);}
+ void SortedInsert(void *item, int sortKey); // Put item into list
+ void *SortedRemove(int *keyPtr); // Remove first item from list
+
+
+ int length; // Length of list
+ ListElement *first; // Head of the list, NULL if list is empty
+ ListElement *last; // Last element of list
+
+};
+
+#endif // LIST_H
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/main.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,106 @@
+#include <tcl.h>
+#include <tk.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "system.h"
+#include "Page.h"
+#include "tcl_interface.h"
+
+
+#define QUIT 0
+#define PAUSE 0
+
+/*
+
+Creates a tcl interpereter, link variable capabilities,
+and "docommand" function--Thanks to: Keiji Kanazawa
+
+To execute a line in tcl: (set x 4)
+
+docommand("set x 4");
+
+also
+
+docommand("set x %d", 4);
+
+or
+
+docommand("set %s %d", "x", 4); this might not work ("x" on the stack)
+
+started 10/95 Archie Russell
+
+*/
+
+
+
+Tcl_Interp* TCL_ip;
+Tk_Window main_window;
+
+Page* global_page = new ZonedPage;
+Page* active_page= NULL;
+Page* zoned_page = NULL;
+
+int
+main(int argc, char** argv)
+{
+ initialize_interpreter();
+ initialize_command_procs();
+ initialize_link_vars();
+ if(ENABLE_USER_INTERFACE)
+ {
+ load_user_interface();
+ }
+
+ Page * testPage;
+ int return_code;
+ testPage = new Page;
+ if(ENABLE_USER_INTERFACE)
+ readLearnFiles();
+ else
+ {
+ if(argc == 3 && strcmp(argv[2], "nolearn") == 0)
+ readLearnedGroups("learnedGroups.dat");
+ else
+ {
+ learn("train.tif", "train.txt",0);
+ learn("4.header.tif", "4.header.txt",1);
+ learn("4.col0.tif", "4.col0.txt", 1);
+ writeLearnedGroups("learnedGroups.dat");
+ }
+ }
+ if(!ENABLE_USER_INTERFACE) {
+ testPage->readMap(argv[1]);
+ testPage->deskew(0);
+ testPage->setLines();
+ testPage->extractComponents(MinHorizSeparation);
+ testPage->recognize();
+ testPage->extractWords();
+ testPage->printComponents();
+ testPage->writeWordPos("wordpos.out");
+ testPage->writeWordbox("wordbox.out",0,0,0);
+ testPage->writeAscii("ascii.out");
+ testPage->addEquation(2 , 200, 4, 100);
+ printf(" in eqn ? %d",testPage->inEquation(450, 105));
+ printf (" deleted? %d", testPage->deleteEquation(400, 100)); // deletes equation with this coordinate.
+ delete testPage;
+ }
+ if(ENABLE_USER_INTERFACE)
+ while(1)
+ {
+ Tk_DoOneEvent(TK_ALL_EVENTS);
+ }
+
+ exit(0);
+}
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/multi_zone.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,612 @@
+
+set zoneWindow .zoning_window
+set HorizMerge 70
+set VertMerge 70
+
+set region_list [list]
+set ordered_region_list [list]
+
+set prev_reg_id 0
+set region_data(0,x_final) 0
+set region_data(0,y_final) 0
+set region_data(0,x_init) -40
+set region_data(0,y_init) -20
+
+set cur_xoffset 0
+set cur_yoffset 0
+
+
+proc multiZone_open { filename } {
+ global HorizMerge VertMerge cur_xoffset curyoffset
+
+ set cur_xoffset 0
+ set cur_yoffset 0
+# 1 means success
+ global IMAGE_DISPLAY_WIN ZONING_SCALE_FACTOR DISPLAY_IMAGE XV xvprocess \
+ multiZone_display_open zoneWindow
+
+ set open 1
+
+ if { $multiZone_display_open == $open } {
+ focus .zoning_window
+ } else {
+
+ toplevel .zoning_window
+ init_zoning_display
+ }
+
+
+# puts stdout "Opening $filename"
+ zoned_page_open $filename
+# Scale image to display
+ init_ZONING_SCALE_FACTOR
+
+
+# puts stdout "Done putting into page structure"
+ if { 1 } {
+ set display_height [expr $ZONING_SCALE_FACTOR * [get_page_height]]
+ set display_width [expr $ZONING_SCALE_FACTOR * [get_page_width]]
+ wm geometry .zoning_window [expr int($display_width) +20]x[expr int($display_height) + 150]+300+100
+
+
+ if { $DISPLAY_IMAGE == $XV } {
+ set xvprocess [exec xv $filename &]
+ puts stdout "xvprocess $xvprocess"
+ } else {
+# use the canvas...
+ DESKEW
+ zone_message "Displaying Image"
+ DISPLAY_INTERVALS .zoning_window.work_space $ZONING_SCALE_FACTOR
+ zone_message "Determining Line boundaries"
+ FIND_LINES
+ zone_message " "
+ }
+ } else {
+ popup_image_failure_win
+ }
+
+
+}
+
+proc init_ZONING_SCALE_FACTOR { } {
+ global ZONING_SCALE_FACTOR
+
+ set ZONING_SCALE_FACTOR [expr 800.0/[get_page_height]]
+ set temp [expr 800.0/[get_page_width]]
+ puts $ZONING_SCALE_FACTOR
+ puts $temp
+ if {[expr $temp] < [expr $ZONING_SCALE_FACTOR] } {
+ set ZONING_SCALE_FACTOR $temp
+ }
+ if { $ZONING_SCALE_FACTOR > 1} {
+ set ZONING_SCALE_FACTOR 1
+ }
+
+}
+
+
+proc init_zoning_display { } {
+ global ZONING_SCALE_FACTOR BACKGROUND FOREGROUND scroll_inc zoneWindow
+ global menu_bar_height button_bar_height SMALLFONT FONT
+
+ wm geometry $zoneWindow 600x900+300+100
+ wm title $zoneWindow "CalZoning"
+ wm positionfrom $zoneWindow user
+ wm minsize $zoneWindow 500 300
+ $zoneWindow configure -background $BACKGROUND
+
+
+ set canvas_width 1000
+ set canvas_height 1000
+
+ canvas $zoneWindow.work_space -bg white -xscrollcommand \
+ "$zoneWindow.xscroller set" -yscrollcommand \
+ "$zoneWindow.yscroller set" -xscrollincrement \
+ $scroll_inc -cursor {crosshair black gray} \
+ -width $canvas_width -height $canvas_height
+# two scrollbars
+ scrollbar $zoneWindow.xscroller -command "$zoneWindow.work_space xview" -orient horizontal -background $BACKGROUND
+ scrollbar $zoneWindow.yscroller -command "$zoneWindow.work_space yview" -background $BACKGROUND
+
+ frame $zoneWindow.menu_bar -height $menu_bar_height -relief raised -bd 2 -background $BACKGROUND
+ init_zoning_menu_bar
+
+ frame $zoneWindow.message_bar -height $button_bar_height \
+ -background $BACKGROUND -relief raised -bd 2
+ message $zoneWindow.message_bar.m -background \
+ $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT \
+ -justify center
+
+ pack $zoneWindow.menu_bar -side top -fill x
+ pack $zoneWindow.message_bar -side top -fill x
+ pack $zoneWindow.message_bar.m -fill x -fill y
+
+ pack $zoneWindow.xscroller -side bottom -fill x
+
+ pack $zoneWindow.yscroller -side right -fill y
+ pack $zoneWindow.work_space -side top -fill x -fill y
+ $zoneWindow.work_space configure -scrollregion { 0 0 5000 5000 }
+ bind $zoneWindow.work_space <Double-3> {
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+ set_active_zone $curx $cury
+ }
+
+ initialize_region_grab
+
+}
+
+
+proc max { a b } {
+ if { [expr $a] > [expr $b] } {
+ return $a
+ } else {
+ return $b
+ }
+}
+
+proc init_zoning_menu_bar { } {
+
+# this command initializes the zoning menu bar
+
+ global BACKGROUND FOREGROUND FONT zoneWindow
+
+ #Zoning
+ menubutton $zoneWindow.menu_bar.zoning -text "Zoning"\
+ -menu $zoneWindow.menu_bar.zoning.menu -borderwidth\
+ 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ menu $zoneWindow.menu_bar.zoning.menu -background $BACKGROUND \
+ -foreground $FOREGROUND -font $FONT
+ $zoneWindow.menu_bar.zoning.menu add command -label "AutoZone..." \
+ -command popup_autozone_menu
+ $zoneWindow.menu_bar.zoning.menu add command -label "Destroy Zones" \
+ -command destroy_all_regions
+
+
+ button $zoneWindow.menu_bar.scale -text "Scale Factor" \
+ -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND \
+ -relief flat \
+ -font $FONT -command popup_scale_menu
+
+# button $zoneWindow.menu_bar.recognize -text "Recognize All" \
+# -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND \
+# -relief flat \
+# -font $FONT -command {puts "Recognize All"}
+
+ button $zoneWindow.menu_bar.help -text "Help" -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT -relief flat \
+ -command { init_zoning_help .main_window.menu_bar.help }
+
+ button $zoneWindow.menu_bar.close -text "Close" \
+ -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND \
+ -relief flat \
+ -font $FONT -command close_zoned_document
+
+ pack $zoneWindow.menu_bar.zoning $zoneWindow.menu_bar.scale \
+ $zoneWindow.menu_bar.close -side left -padx 1m -pady 1m
+ pack $zoneWindow.menu_bar.help -side right -padx 1m -pady 1m
+
+ }
+
+
+
+proc popup_scale_menu { } {
+# a little box for the user to change the horizontal and vertial merging
+# parameters and initiate automatic zoning
+
+ global BACKGROUND FOREGROUND SMALLFONT FONT ZONING_SCALE_FACTOR SELECT
+ if { [winfo exists .scale] } {
+ focus .scale
+ return
+ }
+
+ toplevel .scale -background $BACKGROUND
+ wm geometry .scale 200x100+350+150
+ message .scale.m -text "Scale Factor\n" -background \
+ $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT \
+ -justify center -width 200
+
+ set gm .scale
+ set var ZONING_SCALE_FACTOR
+ set varWindow [string tolower $var]
+ frame $gm.$varWindow
+ label $gm.$varWindow.l -text "Scale Factor" -width 13 -fg $FOREGROUND -background $BACKGROUND -font $FONT -justify left
+ entry $gm.$varWindow.set -width 4 -relief sunken -textvariable $var -fg $FOREGROUND -background $BACKGROUND -font $FONT -selectbackground $SELECT
+ $gm.$varWindow.set icursor 0
+ $gm.$varWindow.set select range 0 10
+ bind $gm.$varWindow.set <Return> scale_zoned_page
+ pack $gm.$varWindow.l -side left
+ pack $gm.$varWindow.set -side right
+ pack $gm.$varWindow -side top
+
+ frame .scale.buttons
+ button .scale.buttons.ok -text OK -command scale_zoned_page -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .scale.buttons.cancel -text Cancel -command {destroy .scale}\
+ -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .scale.buttons.ok .scale.buttons.cancel -side left -expand 1 \
+ -fill x
+
+ pack .scale.buttons\
+ -side top -fill x
+
+
+ focus $gm.$varWindow.set
+
+}
+
+
+
+proc popup_autozone_menu { } {
+# a little box for the user to change the horizontal and vertial merging
+# parameters and initiate automatic zoning
+
+ global BACKGROUND FOREGROUND SMALLFONT FONT HorizMerge VertMerge
+ if { [winfo exists .autozone] } {
+ focus .autozone
+ return
+ }
+
+ toplevel .autozone -background $BACKGROUND
+ wm geometry .autozone 250x225+300+150
+ message .autozone.m -text "Merging Parameters\n" -background \
+ $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT \
+ -justify center -width 250
+ scale .autozone.hmerge -from 0 -to 255 -variable HorizMerge \
+ -orient horizontal -label "Horizontal Merge" \
+ -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+ scale .autozone.vmerge -from 0 -to 255 -variable VertMerge \
+ -orient horizontal -label "Vertical Merge" -background \
+ $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+
+
+ frame .autozone.buttons
+ button .autozone.buttons.ok -text OK -command autozone -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .autozone.buttons.cancel -text Cancel -command {destroy .autozone}\
+ -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .autozone.buttons.ok .autozone.buttons.cancel -side left -expand 1 \
+ -fill x
+
+ pack .autozone.m .autozone.hmerge .autozone.vmerge .autozone.buttons \
+ -side top -fill x
+}
+
+
+proc autozone { } {
+ global HorizMerge VertMerge zoneWindow
+
+ zone_message "Zoning Document"
+ destroy_all_regions
+ AUTO_ZONE $HorizMerge $VertMerge
+ destroy .autozone
+}
+
+proc zone_message { msg } {
+ global zoneWindow
+ $zoneWindow.message_bar.m configure -text $msg \
+ -width 500 -justify center
+}
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+
+set prev_region_id 0
+
+
+set started_region 0
+set region_count 0
+
+proc initialize_region_grab { } {
+global window
+#
+#
+# facilitates the grabbing of a rectangle of the window
+# using mouse button 1
+# canvas subwindow must be called $zoneWindow.work_space
+
+
+ global x_init y_init x_final y_final started_region region_data region_list region_id arrow_in_progress current_arrow zoneWindow
+
+
+
+ bind $zoneWindow.work_space <ButtonPress-1> {
+ if [expr ! $started_region] {
+ puts stdout "window: $zoneWindow \n"
+ grab set $zoneWindow
+ set x_init [$zoneWindow.work_space canvasx %x]
+ set y_init [$zoneWindow.work_space canvasy %y]
+ start_region $x_init $y_init
+
+ }
+ }
+
+ bind $zoneWindow.work_space <ButtonRelease-1> {
+ set x_final [$zoneWindow.work_space canvasx %x]
+ set y_final [$zoneWindow.work_space canvasy %y]
+ end_region $x_final $y_final
+ ADD_ZONE $x_init $y_init $x_final $y_final
+
+ }
+
+ bind $zoneWindow.work_space <B2-Motion> {
+ if $arrow_in_progress {
+ set curx [$zoneWindow.work_space canvasx %x]
+ set cury [$zoneWindow.work_space canvasy %y]
+ $zoneWindow.work_space coords $current_arrow 0 0 $curx $cury
+ }
+ }
+ bind $zoneWindow.work_space <B1-Motion> {
+ if $started_region {
+
+ set curx [$zoneWindow.work_space canvasx %x]
+ set cury [$zoneWindow.work_space canvasy %y]
+
+ $zoneWindow.work_space coords region$region_id $x_init $y_init $curx $cury
+
+
+ }
+ }
+ bind $zoneWindow <Enter> {
+ SWITCH_TO_ZONED_PAGE
+ }
+
+ bind $zoneWindow <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release $zoneWindow
+ set started_region 0
+ $zoneWindow.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+
+set arrow_in_progress 0
+
+proc make_region_buttons { reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow zoneWindow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [$zoneWindow.work_space create rectangle $x_init $y_init [expr $x_init + 40] [expr $y_init + 20] -fill blue -tags "region$reg_id next_button$reg_id"]
+
+ set next_button_data($next_num,reg_id) $reg_id
+ $zoneWindow.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([$zoneWindow.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ set canvas_x [$zoneWindow.work_space canvasx %x]
+ set canvas_y [$zoneWindow.work_space canvasy %y]
+ finish_arrow $reg_id $canvas_x $canvas_y
+ } else {
+ set canvas_x [$zoneWindow.work_space canvasx %x]
+ set canvas_y [$zoneWindow.work_space canvasy %y]
+
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [$zoneWindow.work_space create rectangle [expr $x_init] $y_init [expr $x_init + 20] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ $zoneWindow.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([$zoneWindow.work_space find withtag current],reg_id)
+ destroy_region $reg_id $zoneWindow.work_space
+
+ }
+}
+
+proc start_region { x y} {
+ global x_init y_init x_final y_final started_region region_data region_list region_id arrow_in_progress current_arrow zoneWindow region_list
+
+ set x_init $x
+ set y_init $y
+
+ set region_id [$zoneWindow.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3 ]
+ $zoneWindow.work_space itemconfigure $region_id -tags region$region_id
+
+ lappend region_list $region_id
+ set started_region 1
+
+}
+
+
+proc end_region { x y } {
+ global x_init y_init x_final y_final started_region region_data region_list region_id arrow_in_progress current_arrow zoneWindow
+
+ set x_final $x
+ set y_final $y
+
+ $zoneWindow.work_space coords region$region_id $x_init $y_init $x $y
+
+
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) 0
+ lappend region_list $region_id
+
+ make_region_buttons $region_id
+
+ set started_region 0
+ grab release $zoneWindow
+ }
+
+
+
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow \
+ zoneWindow prev_reg_id ordered_region_list
+
+
+ set path_name $zoneWindow.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [$zoneWindow.work_space create line \
+ [expr $region_data($prev_reg_id,x_init) + 40] \
+ [expr $region_data($prev_reg_id,y_init) + 20]\
+ \
+ $x_start $y_start -width 3 -arrow last \
+ -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+# [expr $region_data($prev_reg_id,x_init) + 40] \
+# [expr $region_data($prev_reg_id,y_final)]\
+
+ set region_data($reg_id,arrow) $arrow
+ #set arrow_in_progress 1
+ set current_arrow $arrow
+
+ set prev_reg_id $reg_id
+ lappend ordered_region_list $reg_id
+}
+
+
+proc finish_arrow { reg_id x_end y_end} {
+ global arrow_in_progress next_button_data region_data current_arrow \
+ zoneWindow
+
+
+ set path_name $zoneWindow.work_space
+# end an arrow in the middle of the little blue button
+
+
+
+ set arrow [$zoneWindow.work_space create line $last_arrow_x $last_arrow_y $x_end $y_end -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 0
+ set current_arrow $arrow
+}
+
+
+
+proc destroy_region { reg_id path_name } {
+ global region_list ordered_region_list region_data prev_reg_id
+
+ set curx [expr [expr $region_data($reg_id,x_init) \
+ + $region_data($reg_id,x_final)] /2]
+ set cury [expr [expr $region_data($reg_id,y_init) \
+ + $region_data($reg_id,y_final)] /2]
+ REMOVE_ZONE $curx $cury
+
+ $path_name delete region$reg_id
+ set region_list [ldelete $region_list $reg_id]
+
+ # Now we have to delete all of the ordering
+ foreach reg $ordered_region_list {
+ $path_name delete arrow$reg
+ set ordered_region_list [ldelete $ordered_region_list $reg]
+ }
+ set prev_reg_id 0
+}
+
+proc destroy_all_regions { } {
+ global region_data region_list zoneWindow prev_reg_id
+
+# puts [array get region_list]
+ foreach reg $region_list {
+ destroy_region $reg $zoneWindow.work_space
+ }
+
+
+}
+
+proc scale_zoned_page { } {
+
+ global ZONING_SCALE_FACTOR
+ .zoning_window.work_space delete all
+ set display_height [expr $ZONING_SCALE_FACTOR * [get_page_height]]
+ set display_width [expr $ZONING_SCALE_FACTOR * [get_page_width]]
+ wm geometry .zoning_window [expr int($display_width)]x[expr int($display_height)]+300+100
+ zone_message "Displaying Image"
+ DISPLAY_INTERVALS .zoning_window.work_space $ZONING_SCALE_FACTOR
+ zone_message "Determining Line boundaries"
+ FIND_LINES
+ zone_message " "
+ destroy .scale
+}
+
+
+proc set_active_zone { x y } {
+ global ZONING_SCALE_FACTOR SCALE_FACTOR cur_xoffset cur_yoffset
+
+
+ SET_ACTIVE_PAGE [expr int( [expr $x / $ZONING_SCALE_FACTOR ] ) ]\
+ [expr int( [expr $y / $ZONING_SCALE_FACTOR ] ) ]
+
+
+}
+
+
+proc close_zoned_document { } {
+ global zoneWindow cur_xoffset cur_yoffset
+
+ set cur_xoffset 0
+ set cur_yoffset 0
+ .main_window.edit_window.text_part delete 1.0 end
+ .main_window.display.work_space delete all
+
+ destroy $zoneWindow
+ set COLORED_WORDS {}
+ DEALLOCATE_PAGE
+
+}
+
+proc init_zoning_help { path } {
+ global FOREGROUND BACKGROUND FONT
+
+
+
+
+ if { [winfo exists $help] } {
+ focus $help
+ return
+ }
+
+ set help [toplevel .zoning_help ]
+ wm title $help "CalZoning Help"
+# wm geometry $help 400x500+600+150
+ message $help.msg -background white -foreground $FOREGROUND -font $FONT\
+ -width 600 -text "\n\
+ Delete a Zone - <Double-2> on red button\n\
+ Draw arrow to Zone - <Double-2> on blue button\n\
+ Create a Zone - <Button-1> drag and release\n\
+ Activate a Zone - <Double-2> within region (not on buttons)\n"
+
+
+
+
+ pack $help.msg -fill x -fill y -expand true
+
+}
+
+
+proc ldelete {list value} {
+
+ set ix [lsearch -exact $list $value]
+ if { $ix >= 0 } {
+ return [lreplace $list $ix $ix]
+ } else {
+ return $list
+ }
+}
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/multi_zone.test.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,234 @@
+proc multiZone_open { filename } {
+# 1 means success
+ global IMAGE_DISPLAY_WIN ZONING_SCALE_FACTOR DISPLAY_IMAGE XV xvprocess \
+ multiZone_display_open
+
+ set open 1
+
+ if { $multiZone_display_open == $open } {
+ focus .zoning_window
+ } else {
+
+ toplevel .zoning_window
+ init_zoning_display .zoning_window
+ }
+
+
+# puts stdout "Opening $filename"
+ page_open $filename
+# Scale image to display
+ set ZONING_SCALE_FACTOR .50
+#[ max [expr 800.0/[get_page_height]] \
+# [expr 800/[get_page_width]] ]
+
+# puts stdout "Done putting into page structure"
+ if { 1 } {
+ set display_height [expr $ZONING_SCALE_FACTOR * [get_page_height]]
+ set display_width [expr $ZONING_SCALE_FACTOR * [get_page_width]]
+ append geometry [expr int($display_width)] x [expr int($display_height)]
+# puts stdout "Displaying Image"
+ if { $DISPLAY_IMAGE == $XV } {
+ set xvprocess [exec xv $filename &]
+ puts stdout "xvprocess $xvprocess"
+ } else {
+# use the canvas...
+ DISPLAY_INTERVALS .zoning_window.work_space $ZONING_SCALE_FACTOR
+ FIND_LINES
+ }
+ } else {
+ popup_image_failure_win
+ }
+ puts stdout "Determining Line boundaries"
+
+}
+
+
+proc init_zoning_display { window } {
+ global ZONING_SCALE_FACTOR BACKGROUND FOREGROUND scroll_inc
+ wm geometry $window 800x800
+ wm title $window "Zoning Window"
+ wm minsize $window 400 300
+ $window configure -background $BACKGROUND
+
+
+ set canvas_width 1000
+ set canvas_height 1000
+
+ canvas $window.work_space -bg white -xscrollcommand \
+ "$window.xscroller set" -yscrollcommand \
+ "$window.yscroller set" -xscrollincrement \
+ $scroll_inc -cursor {crosshair black gray} \
+ -width $canvas_width -height $canvas_height
+# two scrollbars
+ scrollbar $window.xscroller -command "$window.work_space xview" -orient horizontal -background $BACKGROUND
+ scrollbar $window.yscroller -command "$window.work_space yview" -background $BACKGROUND
+
+ pack $window.xscroller -side bottom -fill x
+}
+
+proc max { a b } {
+ if { [expr $a] > [expr $b] } {
+ return $a
+ } else {
+ return $b
+ }
+}
+
+
+
+
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+
+set started_region 0
+set region_count 0
+
+
+proc initialize_region_grab { window } {
+#
+#
+# facilitates the grabbing of a rectangle of the window
+# using mouse button 1
+# canvas subwindow must be called $window.work_space
+
+ global x_init y_init x_final y_final started_region region_data region_list region_id arrow_in_progress current_arrow
+
+ bind $window.work_space <ButtonPress-1> {
+ if [expr ! $started_region] {
+ grab set $window
+ set x_init [$window.work_space canvasx %x]
+ set y_init [$window.work_space canvasy %y]
+
+ set region_id [$window.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3 ]
+ $window.work_space itemconfigure $region_id -tags region$region_id
+
+ set started_region 1
+
+ }
+ }
+ bind $window.work_space <ButtonRelease-1> {
+ set x_final [$window.work_space canvasx %x]
+ set y_final [$window.work_space canvasy %y]
+
+ $window.work_space coords region$region_id $x_init $y_init $x_final $y_final
+
+
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) 0
+ lappend region_list $region_id
+
+ make_region_buttons $region_id
+
+ set started_region 0
+ grab release $window
+ }
+
+ bind $window.work_space <B2-Motion> {
+ if $arrow_in_progress {
+ set curx [$window.work_space canvasx %x]
+ set cury [$window.work_space canvasy %y]
+ $window.work_space coords $current_arrow 0 0 $curx $cury
+ }
+ }
+ bind $window.work_space <B1-Motion> {
+ if $started_region {
+
+ set curx [$window.work_space canvasx %x]
+ set cury [$window.work_space canvasy %y]
+
+ $window.work_space coords region$region_id $x_init $y_init $curx $cury
+
+
+ }
+ }
+ bind $window <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release $window
+ set started_region 0
+ $window.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+
+
+set arrow_in_progress 0
+
+proc make_region_buttons {reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [$window.work_space create rectangle $x_init $y_init [expr $x_init + 40] [expr $y_init + 20] -tags "region$reg_id next_button$reg_id"]
+#statement above had -fill blue between 20 and -tags
+ set next_button_data($next_num,reg_id) $reg_id
+ $window.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([$window.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ finish_arrow $reg_id
+ } else {
+ set canvas_x [$window.work_space canvasx %x]
+ set canvas_y [$window.work_space canvasy %y]
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [$window.work_space create rectangle [expr $x_init] $y_init [expr $x_init + 20] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ $window.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([$window.work_space find withtag current],reg_id)
+ destroy_region $reg_id $window.work_space
+ }
+}
+
+
+
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow
+ set path_name $window.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [$window.work_space create line $x_start $y_start $x_start $y_start -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 1
+ set current_arrow $arrow
+}
+
+
+proc destroy_region { reg_id path_name } {
+ $path_name delete region$reg_id
+ puts stdout "Destroying $reg_id"
+}
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/new_ui.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,1086 @@
+#
+# user interface code (tcl visuals) for OCR
+# started 9/95, Archie Russell
+wm withdraw .
+append OCRCHIE_ROOT [pwd] "/"
+append face_image $OCRCHIE_ROOT face_happy.xbm
+append eye_image $OCRCHIE_ROOT eye.xbm
+append write_image $OCRCHIE_ROOT edit2.xbm
+
+set xvprocess "0"
+set main_window_width 800
+set main_window_height 800
+set dummy 0
+# I'd like to be able to use the above parameters in here,
+# but I think tcl might get a little angry if I try
+# the size of the window, and the position of its upper left
+set main_window_geometry 800x800+200+100
+
+set menu_bar_width $main_window_width
+set menu_bar_height 50
+set button_bar_width $main_window_width
+set button_bar_height 50
+set display_height 400
+set edit_window_height 300
+set quit_dialog_geometry 300x135+500+500
+set save_ascii_geometry 275x140+500+500
+# save a little room for scrollbars, etc.
+
+set BACKGROUND #CCCCCC
+set FOREGROUND #000000
+set SELECT white
+# set FONT -bitstream-*-medium-r-normal--26-171-110-110-p-150-iso8859-1
+#set FONT -bitstream-*-medium-r-normal--19-140-85-85-p-110-hp-roman8
+#set SMALLFONT -bitstream-*-medium-r-normal--19-140-85-85-p-110-hp-roman8
+#2 lines above changed to 2 lines below 5/17/96 RJF
+set FONT -bitstream-*-medium-r-normal-*-19-*-75-75-p-110-iso8859-1
+set SMALLFONT -bitstream-*-medium-r-normal-*-19-*-75-75-p-110-iso8859-1
+
+
+
+set EDIT_BACKGROUND #000000
+set LOW_PRECISION_BACKGROUND blue
+set MISPELLED_BACKGROUND SeaGreen
+set UNKNOWN_CHAR_BACKGROUND red
+
+set scroll_inc 30
+
+
+
+set canvas_width [expr $main_window_width - 30]
+set canvas_hight [expr $main_window_height -30]
+global variables for equation marking
+set curx 0
+set cury 0
+set curline 0
+set curline_startrow 0
+set curline_endrow 0
+set in_equation 0
+set WbxEquationsOnly 0
+#Was the component selected valid in component_select
+set validComponent 0
+
+#comp_menu.tcl holds all of the component options, join, split, and learn
+source "comp_menu.tcl"
+
+#multi_zone.tcl holds the code for displaying a document for zoning
+set multiZone_display_open 0
+source "multi_zone.tcl"
+
+proc init_user_interface {} {
+
+# tcl requires declaration of global variables used in a fxn
+
+ global main_window_geometry main_window_width main_window_height menu_bar_width menu_bar_height button_bar_width button_bar_height display_height BACKGROUND FOREGROUND EDIT_BACKGROUND edit_window_height validComponent
+
+# toplevel windows are at the same level as 'xterms'
+ toplevel .main_window
+
+# $ sign means 'get the value' (otherwise uses the string)
+ wm geometry .main_window $main_window_geometry
+ wm title .main_window "OCR user interface"
+ wm minsize . 400 300
+ .main_window configure -background $BACKGROUND
+# frames are subwindows that are there mostly to 'hold' other windows
+ frame .main_window.menu_bar -width $menu_bar_width -height $menu_bar_height -relief raised -bd 2 -background $BACKGROUND
+ init_menu_bar
+
+ frame .main_window.button_bar -width $button_bar_width -height $button_bar_height -background $BACKGROUND
+ init_button_bar
+
+
+ init_display
+
+ frame .main_window.edit_window -width $main_window_width -height $edit_window_height -relief ridge -bd 5 -bg $EDIT_BACKGROUND
+ init_edit_window
+
+# pack puts things together: this will put the menu_bar window just above the button_bar_window above the display
+
+ pack .main_window.menu_bar -side top -fill x
+ pack .main_window.button_bar .main_window.display .main_window.edit_window -side top -anchor w -fill x
+ focus .main_window
+
+ bind .main_window.display <Enter> {
+ SWITCH_TO_ACTIVE_PAGE
+ }
+
+#Mark an equation
+ bind .main_window.display.work_space <ButtonPress-3> {
+ equation_mark %W %x %y }
+#Delete an equation
+ bind .main_window.display.work_space <Double-2> {
+ equation_delete %W %x %y }
+#Bind <Control-l> to learn
+ bind .main_window.display.work_space <Control-l> {
+ set curx [%W canvasx %x]
+ set cury [%W canvasy %y]
+ component_select %W $curx $cury
+ component_learn
+ }
+
+ #Select a component
+
+ bind .main_window.display.work_space <Double-1> {
+ if { [component_select %W [%W canvasx %x] [%W canvasy %y] ] } {
+ init_component_menu %W [%W canvasx %x] [%W canvasy %y]
+ }
+ }
+
+}
+
+
+proc init_menu_bar { } {
+# this command initializes the main menu bar (stuff like file, etc)
+# shortcuts not working! why?
+ global BACKGROUND FOREGROUND FONT
+
+ menubutton .main_window.menu_bar.file -text "File " -menu .main_window.menu_bar.file.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_file_menu
+
+ menubutton .main_window.menu_bar.tools -text "Tools " -menu .main_window.menu_bar.tools.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_tools_menu
+
+ menubutton .main_window.menu_bar.options -text "Options " -menu .main_window.menu_bar.options.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_options_menu
+
+ menubutton .main_window.menu_bar.windows -text "Windows " -menu .main_window.menu_bar.windows.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_windows_menu
+
+ pack .main_window.menu_bar.file .main_window.menu_bar.tools .main_window.menu_bar.options .main_window.menu_bar.windows -side left -padx 1m -pady 1m
+
+
+ button .main_window.menu_bar.help -text "Help" -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT -relief flat \
+ -command { init_help .main_window.menu_bar.help }
+
+ pack .main_window.menu_bar.help -side right
+}
+
+
+
+
+proc init_file_menu { } {
+# this creates the menu associated with the file menubutton
+ global FOREGROUND BACKGROUND FONT
+ menu .main_window.menu_bar.file.menu -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.file.menu add command -label "Open..." -command popup_open_menu
+ .main_window.menu_bar.file.menu add command -label "Close" -command close_document
+ .main_window.menu_bar.file.menu add separator
+# .main_window.menu_bar.file.menu add command -label "Save TIFF" -command popup_save_tiff_menu
+ .main_window.menu_bar.file.menu add command -label "Save ASCII" -command popup_save_ascii_menu
+# .main_window.menu_bar.file.menu add command -label "Save WORD/POS" -command popup_save_word_pos_menu
+ .main_window.menu_bar.file.menu add command -label "Save WORDBOX" -command popup_save_wordbox_menu
+ .main_window.menu_bar.file.menu add command -label "Save EQUATION BOUNDS" -command popup_save_equations_menu
+
+ .main_window.menu_bar.file.menu add command -label "Save Learned Characters" -command popup_save_learned_chars_menu
+ .main_window.menu_bar.file.menu add command -label "Read Learned Characters" -command popup_read_learned_chars_menu
+
+# .main_window.menu_bar.file.menu add command -label "Save Setup" -command popup_save_setup_menu
+# a separator is just a horizontal line for show
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Quit" -command popup_quit_dialog_box
+}
+
+proc close_document { } {
+ .main_window.edit_window.text_part delete 1.0 end
+ .main_window.display.work_space delete all
+
+ set COLORED_WORDS {}
+ if { [winfo exist .zoning_window] } {
+ focus .zoning_window
+ zone_message "No active zone"
+ } else {
+ DEALLOCATE_PAGE
+
+ }
+}
+
+
+proc init_tools_menu { } {
+# this creates the menu associated with the tools menubutton
+ global BACKGROUND FOREGROUND FONT
+ menu .main_window.menu_bar.tools.menu -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+
+# supposed to just find the angle and tell you what it is...
+# .main_window.menu_bar.tools.menu add command -label "Skew Angle" -command popup_skew_angle_menu
+
+# supposed to let the user arbitrarily rotate the image
+# .main_window.menu_bar.tools.menu add command -label "Rotate" -command popup_rotate_menu
+
+ .main_window.menu_bar.tools.menu add command -label "Extract Components" -command EXTRACT_COMP
+ .main_window.menu_bar.tools.menu add command -label "Recognize" -command popup_recognize_menu
+ .main_window.menu_bar.tools.menu add command -label "Learn" -command popup_learn_menu
+ .main_window.menu_bar.tools.menu add separator
+ .main_window.menu_bar.tools.menu add command -label "Zoom in" -command ZOOM_IN
+ .main_window.menu_bar.tools.menu add command -label "Zoom out" -command ZOOM_OUT
+ .main_window.menu_bar.tools.menu add separator
+ .main_window.menu_bar.tools.menu add command -label "Refresh" -command DISPLAY_INTERVALS
+ .main_window.menu_bar.tools.menu add command -label "Deskew" -command popup_deskew_menu
+
+# automated spell correction
+# .main_window.menu_bar.tools.menu add command -label "SpellCorrect" -command popup_correct_menu
+
+# interactive learning
+# .main_window.menu_bar.tools.menu add command -label "Learn Mode" -command popup_learn_mode
+}
+
+set CURRENT_DEFAULT_FONT Helvetica
+set CURRENT_DEFAULT_SIZE 9
+proc popup_learn_mode { } {
+ global BACKGROUND FOREGROUND SMALLFONT FONT
+ toplevel .learn
+ frame .learn.c -width 100 -height 100
+ canvas .learn.c.c -background $BACKGROUND -width 100 -height 100
+ pack .learn.c.c
+ frame .learn.s
+ entry .learn.s.learned_string -width 5 -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT -selectbackground $SELECT
+ .learn.s.learned_string icursor 0
+ .learn.s.learned_string select range 0 10
+ label .learn.s.string_message -text "Ascii" -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ pack .learn.s.learned_string .learn.s.string_message -side left -expand 1
+
+ tk_optionMenu .learn.font CURRENT_DEFAULT_FONT Helvetica Courier Times
+ .learn.font configure -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ .learn.font.menu configure -bg $BACKGROUND -fg $FOREGROUND
+ tk_optionMenu .learn.size CURRENT_DEFAULT_SIZE 9 10 12 18
+ .learn.size configure -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ .learn.size.menu configure -bg $BACKGROUND -fg $FOREGROUND
+ frame .learn.buttons
+ button .learn.buttons.ok -text Learn -command learn_ok -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .learn.buttons.cancel -text Skip -command learn_skip -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .learn.buttons.ok .learn.buttons.cancel -side left -expand 1 -fill x
+ pack .learn.c .learn.s .learn.font .learn.size .learn.buttons -side top -fill x
+}
+
+
+proc init_windows_menu { } {
+# this creates the menu associated with the windows menubutton. Just
+ #focuses the window
+
+ global FOREGROUND BACKGROUND FONT
+ menu .main_window.menu_bar.windows.menu -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.windows.menu add command -label "Zoning Window" -command { if { [winfo exists .zoning_window] } {focus .zoning_window} }
+ .main_window.menu_bar.windows.menu add command -label "Active Window" -command {focus .main_window}
+
+}
+
+proc init_help { path } {
+ global FOREGROUND BACKGROUND FONT
+
+
+ if { [winfo exists $help] } {
+ focus $help
+ return
+ }
+
+ set help [toplevel .main_help ]
+ wm title $help "Active Window Help"
+# wm geometry $help 400x500+600+150
+ message $help.msg -background white -foreground $FOREGROUND -font $FONT\
+ -width 600 -text "\n\
+ Mark Equation - <Button3> at start and end of Equation \n\
+ Delete Equation- <Double-2> within the equation \n\n\
+ Component Menu - <Double-1> within the component boundaries \n\
+ \t Join - Select \[Join\] and <Double-1> on component to join\n\
+ \t Split - Select \[Horizontal Split\] or \[Vertical Split\]\n\
+ \t Learn - Select \[Learn\] and enter value. \n"
+
+
+
+ pack $help.msg -fill x -fill y -expand true
+
+}
+
+proc popup_deskew_menu { } {
+ puts stdout "Calling get_skew"
+ DESKEW
+}
+
+proc popup_recognize_menu { } {
+ puts stdout "Calling recognize from Tcl"
+ RECOGNIZE
+}
+proc popup_learn_menu { } {
+ puts stdout "Calling interactive learn from Tcl"
+
+ set fileid [open recog.tmp w]
+ puts $fileid [.main_window.edit_window.text_part get 1.0 end]
+ close $fileid
+
+ # last argument synchronizes words
+ LEARN_PAGE recog.tmp 1
+
+}
+
+set XV 2
+proc init_options_menu { } {
+ global dummy word_certainty_value screen_view_style FOREGROUND BACKGROUND FONT XV
+ menu .main_window.menu_bar.options.menu -foreground $FOREGROUND -background $BACKGROUND -font $FONT
+ .main_window.menu_bar.options.menu add command -label "Warning Levels..." -command popup_confidence_menu
+
+# .main_window.menu_bar.options.menu add command -label "Zoom Ratio" -command popup_zoom_ratio_menu
+ .main_window.menu_bar.options.menu add checkbutton -label "Display Line Boundaries" -variable DISPLAY_LINE_BOUNDARIES
+ .main_window.menu_bar.options.menu add checkbutton -label "Display Bounding Boxes" -variable DISPLAY_BOUNDING_BOXES
+ .main_window.menu_bar.options.menu add checkbutton -label "Spellcheck" -variable SPELLCHECK
+ .main_window.menu_bar.options.menu add separator
+ .main_window.menu_bar.options.menu add radiobutton -label "No Display" -variable DISPLAY_IMAGE -value 0
+ .main_window.menu_bar.options.menu add radiobutton -label "OCRchie Display" -variable DISPLAY_IMAGE -value 1
+ .main_window.menu_bar.options.menu add radiobutton -label "xv" -variable DISPLAY_IMAGE -value $XV
+ .main_window.menu_bar.options.menu add separator
+ .main_window.menu_bar.options.menu add radiobutton -label "No deskew" -variable DESKEW_METHOD -value -1
+ .main_window.menu_bar.options.menu add radiobutton -label "RLE rotate" -variable DESKEW_METHOD -value 1
+ .main_window.menu_bar.options.menu add radiobutton -label "Bitmap rotate" -variable DESKEW_METHOD -value 0
+ .main_window.menu_bar.options.menu add cascade -label "Global Vars" \
+ -menu .main_window.menu_bar.options.menu.globals
+ init_global_menu { NoiseTolerance MinLineSize MinVertSeparation \
+ MinHorizSeparation ConfidenceThreshold JoinTolerance }
+
+
+
+}
+
+set GLOBAL_MESSAGE "<none>"
+proc init_button_bar { } {
+ global FONT BACKGROUND FOREGROUND GLOBAL_MESSAGE
+ message .main_window.button_bar.msg -font $FONT -background $BACKGROUND -foreground $FOREGROUND -width 400
+ pack .main_window.button_bar.msg
+}
+
+proc init_display { } {
+ global display_height canvas_width canvas_height FOREGROUND BACKGROUND IMAGE_DISPLAY_WIN main_window_width display_height scroll_inc
+ frame .main_window.display -width $main_window_width -height $display_height -relief ridge -bd 5 -bg $BACKGROUND
+ canvas .main_window.display.work_space -bg white -xscrollcommand ".main_window.display.xscroller set" -yscrollcommand ".main_window.display.yscroller set" -xscrollincrement $scroll_inc -cursor {crosshair black gray} -width $canvas_width -height $display_height
+# two scrollbars
+ scrollbar .main_window.display.xscroller -command ".main_window.display.work_space xview" -orient horizontal -background $BACKGROUND
+ scrollbar .main_window.display.yscroller -command ".main_window.display.work_space yview" -background $BACKGROUND
+
+ pack .main_window.display.xscroller -side bottom -fill x
+
+ pack .main_window.display.yscroller -side right -fill y
+ pack .main_window.display.work_space -side top -fill x
+ set IMAGE_DISPLAY_WIN .main_window.display.work_space
+ .main_window.display.work_space configure -scrollregion { 0 0 5000 5000 }
+
+# box_init
+}
+
+proc init_edit_window { } {
+ global edit_window_height canvas_width EDIT_BACKGROUND COLORED_WORDS LOW_PRECISION_BACKGROUND MISPELLED_BACKGROUND UNKNOWN_CHAR_BACKGROUND SMALLFONT SCALE_FACTOR scroll_inc
+ text .main_window.edit_window.text_part -bg $EDIT_BACKGROUND -height $edit_window_height -width $canvas_width -insertbackground yellow -insertwidth 8 -font $SMALLFONT -fg white -wrap word
+ pack .main_window.edit_window.text_part -side bottom
+ .main_window.edit_window.text_part tag configure LOW_PRECISION -background $LOW_PRECISION_BACKGROUND
+ .main_window.edit_window.text_part tag configure MISPELLED -background $MISPELLED_BACKGROUND
+ .main_window.edit_window.text_part tag configure UNKNOWN_CHAR -background $UNKNOWN_CHAR_BACKGROUND
+# Tab binding for the window is supposed to advance the cursor to the
+# next uncertain word and scroll the image display to show the image
+# of that word
+ bind .main_window.edit_window.text_part <Tab> {
+ if {[llength $COLORED_WORDS] == 0} {
+ puts stdout "No more words"
+ } else {
+ .main_window.edit_window.text_part mark set insert [pop_colored_words]
+ set xpos [pop_colored_words]
+ set ypos [pop_colored_words]
+# puts "xpos and ypos for this word"
+ set ulx [expr $SCALE_FACTOR * ($xpos - 300)]
+ set uly [expr $SCALE_FACTOR * ($ypos - 100)]
+ set lrx [expr $SCALE_FACTOR * ($xpos + 300)]
+ set lry [expr $SCALE_FACTOR * ($ypos + 100)]
+# I could never get this scrolling to work quite right, maybe
+# someone will figure it out someday
+ .main_window.display.work_space configure -scrollregion [list $ulx $uly $lrx $lry]
+ .main_window.display.work_space configure -scrollregion {0 0 5000 5000}
+# .main_window.display.work_space xview moveto [expr (($SCALE_FACTOR * $xpos) / $scroll_inc)]
+# .main_window.display.work_space yview moveto [expr (($SCALE_FACTOR * $ypos) / $scroll_inc)]
+
+ set x [.main_window.edit_window.text_part index insert]
+# puts "New index is $x"
+ .main_window.edit_window.text_part see insert
+ set local_tags [.main_window.edit_window.text_part tag names insert]
+# puts "Tags at this place: $local_tags"
+ }
+ break
+ }
+}
+
+proc addword { w {xpos 0} {ypos 0} {status OK}} {
+ global COLORED_WORDS LOW_PRECISION_BACKGROUND MISPELLED_BACKGROUND UNKNOWN_CHAR_BACKGROUND
+# puts stdout "Adding $w with status $status"
+
+ if { ![string compare $status OK] } {
+ .main_window.edit_window.text_part insert end "$w "
+ .main_window.edit_window.text_part mark set insert end
+ } elseif { ![string compare $status LOW_PRECISION] || ![string compare $status MISPELLED] || ![string compare $status UNKNOWN_CHAR] } {
+ .main_window.edit_window.text_part insert end "$w" $status
+ .main_window.edit_window.text_part insert end " "
+# xpos and ypos can be tags too, but they really slow things down
+# .main_window.edit_window.text_part mark set insert end
+# .main_window.edit_window.text_part mark set insert "end -3 char"
+# .main_window.edit_window.text_part tag add $status "insert wordstart" "insert wordend"
+# .main_window.edit_window.text_part tag add x$xpos "insert wordstart" "insert wordend"
+# .main_window.edit_window.text_part tag add y$ypos "insert wordstart" "insert wordend"
+ .main_window.edit_window.text_part mark set insert "end -3 char"
+ .main_window.edit_window.text_part mark set insert "insert wordstart"
+ lappend COLORED_WORDS [.main_window.edit_window.text_part index insert]
+ lappend COLORED_WORDS $xpos
+ lappend COLORED_WORDS $ypos
+ .main_window.edit_window.text_part mark set insert end
+ } else {
+ puts stdout "Unknown word status for $w: $status"
+ .main_window.edit_window.text_part insert end "$w UNKNOWNSTATUS? "
+ }
+}
+
+proc pop_colored_words { } {
+ global COLORED_WORDS
+ set x [lindex $COLORED_WORDS 0]
+ if {[llength $COLORED_WORDS] == 1} {
+ set COLORED_WORDS {}
+ } elseif {[llength $COLORED_WORDS] == 0} {
+ set COLORED_WORDS $COLORED_WORDS
+ } else {
+ set COLORED_WORDS [lrange $COLORED_WORDS 1 [llength $COLORED_WORDS]]
+ }
+ return $x
+}
+
+
+set open_menu_geometry 250x300+400+400
+set current_directory [pwd]
+set box_entry $current_directory
+set open_menu_pattern *.tif
+
+set singleZone "Single Zone"
+set multiZone "Multi Zone"
+set zoning "Single Zone"
+
+proc popup_open_menu { } {
+#
+# this procedure pops up an interactive box which can be used to open files
+# what a horrible mess. Writing it took forever
+#
+ global open_menu_geometry open_menu_pattern current_directory FONT FOREGROUND BACKGROUND SMALLFONT box_entry singleZone multiZone zoning SELECT
+
+ if { [winfo exists .open_menu] } {
+ focus .open_menu
+ return
+ }
+ toplevel .open_menu
+ wm geometry .open_menu $open_menu_geometry
+ wm title .open_menu Open
+ .open_menu configure -background $BACKGROUND
+ # force the user to interact with this box
+ # grab set .open_menu
+
+ # directory listing and scrollbar
+ frame .open_menu.dirstuff
+
+ frame .open_menu.cur_dir
+ label .open_menu.cur_dir.l -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -text "Dir: "
+ entry .open_menu.cur_dir.e -relief sunken -bd 2 -textvariable box_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT -selectbackground $SELECT
+ .open_menu.cur_dir.e icursor 0
+
+
+ frame .open_menu.zone
+
+ radiobutton .open_menu.zone.single -variable zoning -value $singleZone \
+ -text $singleZone -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+
+ radiobutton .open_menu.zone.multi -variable zoning -value $multiZone \
+ -text $multiZone -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+
+ pack .open_menu.zone.single -side left
+ pack .open_menu.zone.multi -side left
+ pack .open_menu.zone -side top -fill x
+
+ .open_menu.cur_dir.e icursor end
+ bind .open_menu.cur_dir.e <Return> {
+ set file_to_open $box_entry
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+ puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } elseif [file exists $file_to_open] {
+ puts stdout "Opening file $file_to_open"
+ if { $zoning == $singleZone } {
+ singleZone_open $file_to_open
+ } else
+ { multiZone_open $file_to_open }
+ destroy .open_menu
+ } else {
+ puts stdout "Cannot acccess that file"
+ }
+ }
+ pack .open_menu.cur_dir.l .open_menu.cur_dir.e -side left
+
+ scrollbar .open_menu.dirstuff.yscroll -command ".open_menu.dirstuff.directory yview" -background $BACKGROUND
+ listbox .open_menu.dirstuff.directory -yscrollcommand ".open_menu.dirstuff.yscroll set" -width 22 -height 11 -relief raised -font $SMALLFONT -background $BACKGROUND -foreground $FOREGROUND
+
+ fill_in_directory_box $current_directory $open_menu_pattern
+ bind .open_menu.dirstuff.directory <Double-Button-1> {
+ set file_to_open [selection get]
+# puts stdout "Bound button"
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+# puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } elseif [file exists $file_to_open] {
+# puts stdout "Opening file $file_to_open"
+ if { $zoning == $singleZone } {
+ singleZone_open $file_to_open
+ } else { multiZone_open $file_to_open }
+ destroy .open_menu.dirstuff.directory
+ destroy .open_menu
+ } else {
+ puts stdout "Cannot access that file"
+ }
+ }
+ # pattern for listings to match
+
+
+ frame .open_menu.pattern_match -background $BACKGROUND
+ label .open_menu.pattern_match.label -text "Match files of type:" -font $SMALLFONT -background $BACKGROUND -fg $FOREGROUND
+ entry .open_menu.pattern_match.entry -width 5 -relief sunken -bd 2 -textvariable open_menu_pattern -font $SMALLFONT -background $BACKGROUND -fg $FOREGROUND
+ # refresh the directory listing after user presses return
+ bind .open_menu.pattern_match.entry <Return> {
+ set current_directory [pwd]
+ clear_directory_box
+ fill_in_directory_box $current_directory $open_menu_pattern
+ }
+
+ pack .open_menu.pattern_match.label .open_menu.pattern_match.entry -side left
+ pack .open_menu.dirstuff.directory .open_menu.dirstuff.yscroll -side left -fill y
+
+
+ pack .open_menu.pattern_match .open_menu.cur_dir .open_menu.dirstuff -side top -anchor w
+ focus .open_menu.pattern_match.entry
+}
+
+
+proc popup_confidence_menu { } {
+# a little box for the user to change the confidence
+# warning levels (words that get highlighted)
+ global BACKGROUND FOREGROUND SMALLFONT FONT
+ toplevel .confidence -background $BACKGROUND
+ wm geometry .confidence 250x225+500+500
+ message .confidence.m -text "Warning thresholds for the output display\n (255 = warn unless perfect)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT -justify center -width 250
+ scale .confidence.very_low -from 0 -to 255 -variable VERY_LOW_CONFIDENCE -orient horizontal -label "Poor (displayed in red)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+ scale .confidence.low -from 0 -to 255 -variable LOW_CONFIDENCE -orient horizontal -label "Fair (displayed in blue)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+ pack .confidence.m .confidence.very_low .confidence.low -side top -fill x
+}
+
+proc singleZone_open { filename } {
+# 1 means success
+ global IMAGE_DISPLAY_WIN SCALE_FACTOR DISPLAY_IMAGE XV xvprocess
+# puts stdout "Opening $filename"
+ page_open $filename
+# puts stdout "Done putting into page structure"
+ if { 1 } {
+ set display_height [expr $SCALE_FACTOR * [get_page_height]]
+ set display_width [expr $SCALE_FACTOR * [get_page_width]]
+ append geometry [expr int($display_width)] x [expr int($display_height)]
+# puts stdout "Displaying Image"
+ if { $DISPLAY_IMAGE == $XV } {
+ set xvprocess [exec xv $filename &]
+ puts stdout "xvprocess $xvprocess"
+ } else {
+# use the canvas...
+ DISPLAY_INTERVALS .main_window.display.work_space $SCALE_FACTOR
+ FIND_LINES
+ }
+ } else {
+ popup_image_failure_win
+ }
+ puts stdout "Determining Line boundaries"
+
+}
+
+set save_entry "recog.txt"
+proc popup_save_ascii_menu { } {
+#
+# Pops up a little window for saving the ascii recognized text
+# Should have a general function for all the saves, but now
+# they are just cuts and pastes
+#
+#
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT write_image SELECT
+
+ set save_entry recog.txt
+ toplevel .save_ascii -background $BACKGROUND
+ wm geometry .save_ascii $save_ascii_geometry
+ wm title .save_ascii "Save ASCII Text"
+ grab set .save_ascii
+
+
+ label .save_ascii.image -bitmap @$write_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_ascii.s -background $BACKGROUND
+ label .save_ascii.s.txt -text "Save ascii text as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_ascii.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT -selectbackground $SELECT
+
+ .save_ascii.s.ent icursor 0
+ .save_ascii.s.ent select range 0 12
+
+ pack .save_ascii.s.txt .save_ascii.s.ent -side top
+ frame .save_ascii.buttons
+ button .save_ascii.buttons.ok -text OK -command save_ascii -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_ascii.buttons.cancel -text Cancel -command save_ascii_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_ascii.buttons.ok .save_ascii.buttons.cancel -side left -expand 1 -fill x
+ pack .save_ascii.image .save_ascii.s .save_ascii.buttons -side top
+
+ bind .save_ascii.s.ent <Return> {
+ save_ascii
+ }
+}
+
+proc save_ascii_cancel { } {
+ destroy .save_ascii
+}
+
+proc save_ascii { } {
+ # need to put some error checking in here
+ global save_entry
+ set fileid [open $save_entry w]
+ puts $fileid [.main_window.edit_window.text_part get 1.0 end]
+ close $fileid
+ destroy .save_ascii
+}
+
+proc popup_save_word_pos_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT face_image
+
+ set save_entry recog.wps
+ toplevel .save_word_pos -background $BACKGROUND
+ wm geometry .save_word_pos $save_ascii_geometry
+ wm title .save_word_pos "Save in word/pos format"
+ grab set .save_word_pos
+
+ label .save_word_pos.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_word_pos.s -background $BACKGROUND
+ label .save_word_pos.s.txt -text "Save word_pos text as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_word_pos.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_word_pos.s.txt .save_word_pos.s.ent -side top
+ frame .save_word_pos.buttons
+ button .save_word_pos.buttons.ok -text OK -command save_word_pos -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_word_pos.buttons.cancel -text Cancel -command save_word_pos_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_word_pos.buttons.ok .save_word_pos.buttons.cancel -side left -expand 1 -fill x
+ pack .save_word_pos.image .save_word_pos.s .save_word_pos.buttons -side top
+
+ bind .save_word_pos.s.ent <Return> {
+ save_word_pos
+ }
+}
+
+proc save_word_pos_cancel { } {
+ destroy .save_word_pos
+}
+
+proc save_word_pos { } {
+ # need to put some error checking in here
+ global save_entry
+ WRITE_WORD_POS $save_entry
+ destroy .save_word_pos
+}
+
+proc popup_save_wordbox_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT face_image WbxEquationsOnly
+
+ set save_entry recog.wbx
+ toplevel .save_wordbox -background $BACKGROUND
+ wm geometry .save_wordbox 275x200+500+500
+ wm title .save_wordbox "Save in word/pos format"
+ grab set .save_wordbox
+
+ label .save_wordbox.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_wordbox.s -background $BACKGROUND
+ label .save_wordbox.s.txt -text "Save wordbox text as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_wordbox.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_wordbox.s.txt .save_wordbox.s.ent -side top
+ frame .save_wordbox.buttons
+ checkbutton .save_wordbox.buttons.eqn \
+ -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT \
+ -text "Equations only" -variable WbxEquationsOnly
+
+ button .save_wordbox.buttons.ok -text OK -command save_wordbox -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_wordbox.buttons.cancel -text Cancel -command save_wordbox_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_wordbox.buttons.eqn -side top -expand 1 -fill x
+ pack .save_wordbox.buttons.ok .save_wordbox.buttons.cancel -side left \
+ -expand 1 -fill x
+
+ pack .save_wordbox.image .save_wordbox.s .save_wordbox.buttons -side top
+
+ bind .save_wordbox.s.ent <Return> {
+ save_wordbox
+ }
+}
+
+proc save_wordbox_cancel { } {
+ destroy .save_wordbox
+}
+
+proc save_wordbox { } {
+ # need to put some error checking in here
+ global save_entry cur_xoffset cur_yoffset WbxEquationsOnly
+
+ WRITE_WORDBOX $save_entry $cur_xoffset $cur_yoffset $WbxEquationsOnly
+ destroy .save_wordbox
+}
+
+proc popup_save_equations_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT face_image cur_xoffset cur_yoffset
+
+ set save_entry recog.eqn
+ toplevel .save_equations -background $BACKGROUND
+ wm geometry .save_equations $save_ascii_geometry
+ wm title .save_equations "Save Equation boundaries"
+ grab set .save_equations
+
+ label .save_equations.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_equations.s -background $BACKGROUND
+ label .save_equations.s.txt -text "Save equation boundaries as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_equations.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_equations.s.txt .save_equations.s.ent -side top
+ frame .save_equations.buttons
+ button .save_equations.buttons.ok -text OK -command save_equations -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_equations.buttons.cancel -text Cancel -command save_equations_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_equations.buttons.ok .save_equations.buttons.cancel -side left -expand 1 -fill x
+ pack .save_equations.image .save_equations.s .save_equations.buttons -side top
+
+ bind .save_equations.s.ent <Return> {
+ save_equations cur_xoffset cur_yoffset
+ }
+}
+
+proc save_equations_cancel { } {
+ destroy .save_equations
+}
+
+proc save_equations { } {
+ # need to put some error checking in here
+ global save_entry
+ # second parameter is line number offset
+ WRITE_EQUATIONS $save_entry 0
+ destroy .save_equations
+}
+
+
+proc popup_save_learned_chars_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry face_image
+ set save_entry learn.dat
+ toplevel .save_learned_chars -background $BACKGROUND
+ wm geometry .save_learned_chars $save_ascii_geometry
+ wm title .save_learned_chars "Write Learned Characters"
+ grab set .save_learned_chars
+
+ label .save_learned_chars.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_learned_chars.s -background $BACKGROUND
+ label .save_learned_chars.s.txt -text "Save learned characters as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_learned_chars.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_learned_chars.s.txt .save_learned_chars.s.ent -side top
+ frame .save_learned_chars.buttons
+ button .save_learned_chars.buttons.ok -text OK -command save_learned_chars -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_learned_chars.buttons.cancel -text Cancel -command save_learned_chars_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_learned_chars.buttons.ok .save_learned_chars.buttons.cancel -side left -expand 1 -fill x
+ pack .save_learned_chars.image .save_learned_chars.s .save_learned_chars.buttons -side top
+
+ bind .save_learned_chars.s.ent <Return> {
+ save_learned_chars
+ }
+}
+
+proc save_learned_chars_cancel { } {
+ destroy .save_learned_chars
+}
+
+proc save_learned_chars { } {
+ # need to put some error checking in here?
+ global save_entry
+ WRITE_LEARNED_CHARS $save_entry
+ destroy .save_learned_chars
+}
+
+proc popup_read_learned_chars_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry eye_image
+ set save_entry learn.dat
+ toplevel .read_learned_chars -background $BACKGROUND
+ wm geometry .read_learned_chars $save_ascii_geometry
+ wm title .read_learned_chars "Read Learned Characters"
+ grab set .read_learned_chars
+
+ label .read_learned_chars.image -bitmap @$eye_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .read_learned_chars.s -background $BACKGROUND
+ label .read_learned_chars.s.txt -text "Read learned characters from:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .read_learned_chars.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .read_learned_chars.s.txt .read_learned_chars.s.ent -side top
+ frame .read_learned_chars.buttons
+ button .read_learned_chars.buttons.ok -text OK -command read_learned_chars -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .read_learned_chars.buttons.cancel -text Cancel -command read_learned_chars_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .read_learned_chars.buttons.ok .read_learned_chars.buttons.cancel -side left -expand 1 -fill x
+ pack .read_learned_chars.image .read_learned_chars.s .read_learned_chars.buttons -side top
+
+ bind .read_learned_chars.s.ent <Return> {
+ read_learned_chars
+ }
+}
+
+proc read_learned_chars_cancel { } {
+ destroy .read_learned_chars
+}
+
+proc read_learned_chars { } {
+ # need to put some error checking in here
+ global save_entry
+ LEARN_DATA $save_entry
+ destroy .read_learned_chars
+}
+
+
+proc PAGE_OPEN { filename } {
+# unused
+ return 1
+}
+
+proc clear_directory_box { } {
+ .open_menu.dirstuff.directory delete 0 end
+}
+
+proc fill_in_directory_box { dirname {pattern *} } {
+# fills in the directory box with directories or files matching the pattern
+ foreach i [exec ls -aF $dirname] {
+ if [file isdirectory $i] {
+ .open_menu.dirstuff.directory insert end $i
+ } elseif [string match $pattern $i] {
+ .open_menu.dirstuff.directory insert end $i
+ }
+ }
+}
+
+
+proc popup_quit_dialog_box { } {
+ global quit_dialog_geometry BACKGROUND FOREGROUND FONT OCRCHIE_ROOT
+
+ toplevel .quit_dialog
+ wm geometry .quit_dialog $quit_dialog_geometry
+ wm title .quit_dialog Quit
+ grab set .quit_dialog
+
+ append caution_image_name $OCRCHIE_ROOT caution.xbm
+ label .quit_dialog.image -bitmap @$caution_image_name -foreground $FOREGROUND -background $BACKGROUND
+ message .quit_dialog.msg -text "You are about to quit OCRchie. All changes you have made will be lost." -font $FONT -background $BACKGROUND -fg $FOREGROUND -width 275 -justify center
+ frame .quit_dialog.buttons
+ button .quit_dialog.buttons.ok -text OK -command quit_ok -fg $FOREGROUND -background $BACKGROUND -font $FONT -width 5
+ button .quit_dialog.buttons.cancel -text Cancel -command quit_cancel -fg $FOREGROUND -background $BACKGROUND -font $FONT -width 5
+ pack .quit_dialog.buttons.ok .quit_dialog.buttons.cancel -side left -expand 1 -fill x
+ pack .quit_dialog.image .quit_dialog.msg .quit_dialog.buttons -side top -fill x
+
+
+}
+
+proc quit_ok { } {
+# destroy .t
+# destroy .histogram
+ destroy .main_window
+ destroy .quit_dialog
+ QUIT
+}
+
+proc quit_cancel { } {
+ global command_not_in_progress
+ set command_not_in_progress 1
+ destroy .quit_dialog
+}
+
+proc clear_canvas { } {
+ destroy .main_window.display.work_space
+ destroy .main_window.display.xscroller
+ destroy .main_window.display.yscroller
+ destroy .main_window.display
+ init_display
+}
+
+proc spellcheck { word } {
+# spellchecks a word
+# could change to use spell or some faster program
+ global x
+ set x [exec echo $word | ispell -a]
+ if { ([string last * $x] == -1) && ([string last + $x] == -1) } {
+ return MISPELLED
+ } else {
+ return SPELLED_CORRECTLY
+ }
+}
+
+
+#Not used right now
+proc box_begin { w x y } {
+ global box
+ set box(anchor) [list $x $y]
+ catch {unset box(last)}
+}
+
+proc box_drag { w x y } {
+ global box
+ catch { $w delete $box(last) }
+ set box(last) [eval {$w create rect -fill yellow} $box(anchor) {$x $y -tag box} ]
+}
+
+
+
+proc equation_mark { w x y } {
+ global curline curline_startrow curline_endrow curline_startcol \
+ curline_endcol curx cury in_equation
+ global prevline prevline_startrow prevline_endrow prevx prevy
+
+ # have to adjust x and y for scrolling canvas
+ set thisx [.main_window.display.work_space canvasx $x]
+ set thisy [.main_window.display.work_space canvasy $y]
+
+ # save the last mark
+ set prevline $curline
+ set prevline_startrow $curline_startrow
+ set prevline_endrow $curline_endrow
+ set prevx $curx
+ set prevy $cury
+
+ #set curline and prevline variables by calling Page::get_linenum
+
+ GET_LINENUM $thisx $thisy
+
+ set curx $thisx
+ set cury $thisy
+
+ if { $in_equation == 0 } {
+ set in_equation 1
+ puts stdout "$curx $cury $curline $curline_startrow $curline_endrow"
+ puts stdout "$curx $cury $prevline $prevline_startrow $prevline_endrow"
+
+ } else {
+ set in_equation 0
+ puts stdout "$curx $cury $curline $curline_startrow $curline_endrow"
+ puts stdout "$curx $cury $prevline $prevline_startrow $prevline_endrow"
+ if { $curline == $prevline } {
+ .main_window.display.work_space create rectangle $prevx \
+ $prevline_startrow $curx $curline_endrow -fill yellow \
+ -outline black -stipple @grey.25 -tags IMAGE_TAG
+ } else {
+ # polygon is 8 points (9 to connect) which surrounds the equation
+ .main_window.display.work_space create polygon \
+ $prevx $prevline_startrow \
+ $curline_endcol $prevline_startrow\
+ $curline_endcol $curline_startrow \
+ $curx $curline_startrow \
+ $curx $curline_endrow \
+ $curline_startcol $curline_endrow \
+ $curline_startcol $prevline_endrow \
+ $prevx $prevline_endrow \
+ $prevx $prevline_startrow \
+ -fill yellow -outline black -stipple @grey.25 -tags IMAGE_TAG
+ }
+ # argv[1] starting line of equation, [2] startcol [3] endline [4] endcol
+ ADD_EQUATION $prevline $prevx $curline $curx
+
+ }
+
+}
+
+
+proc equation_delete { w x y } {
+ global curline curline_startrow curline_endrow curline_startcol \
+ curline_endcol curx cury in_equation
+ global prevline prevline_startrow prevline_endrow prevx prevy
+ global deleted
+ set deleted 0
+ # have to adjust x and y for scrolling canvas
+ set thisx [.main_window.display.work_space canvasx $x]
+ set thisy [.main_window.display.work_space canvasy $y]
+
+ #deletes equation and sets prevline and curline vars for deletion
+ #also sets deleted var to 1 if equation was deleted 0 otherwise.
+
+ DELETE_EQUATION $thisx $thisy
+
+
+
+ puts stdout "$curx $cury $curline $curline_startrow $curline_endrow"
+ puts stdout "$curx $cury $prevline $prevline_startrow $prevline_endrow"
+ if { $deleted == 0 } { return 0 }
+ if { $curline == $prevline } {
+ .main_window.display.work_space create rectangle $prevx \
+ $prevline_startrow $curx $curline_endrow -fill white \
+ -outline white -stipple @grey.25 -tags IMAGE_TAG
+ } else {
+ # polygon is 8 points (9 to connect) which surrounds the equation
+ .main_window.display.work_space create polygon \
+ $prevx $prevline_startrow \
+ $curline_endcol $prevline_startrow\
+ $curline_endcol $curline_startrow \
+ $curx $curline_startrow \
+ $curx $curline_endrow \
+ $curline_startcol $curline_endrow \
+ $curline_startcol $prevline_endrow \
+ $prevx $prevline_endrow \
+ $prevx $prevline_startrow \
+ -fill white -outline white -stipple @grey.25 -tags IMAGE_TAG
+ }
+ }
+
+
+
+proc component_select { w x y } {
+ global curCompId validComponent
+
+ set thisx [.main_window.display.work_space canvasx $x]
+ set thisy [.main_window.display.work_space canvasy $y]
+ SELECT_COMP $thisx $thisy
+ return $validComponent ;#Set in SELECT_COMP
+
+}
+
+proc init_global_menu { lst } {
+ global FOREGROUND BACKGROUND FONT
+ puts stdout "Entering init_global_menu \n"
+
+ set gm [menu .main_window.menu_bar.options.menu.globals]
+ puts stdout "This is our frame "
+ puts stdout $gm
+ foreach var $lst {
+ set varWindow [string tolower $var]
+ frame $gm.$varWindow
+ label $gm.$varWindow.l -text $var -width 20 -fg $FOREGROUND -background $BACKGROUND -font $FONT
+ entry $gm.$varWindow.set -width 4 -relief sunken -textvariable $var -fg $FOREGROUND -background $BACKGROUND -font $FONT
+ pack $gm.$varWindow.l -side left
+ pack $gm.$varWindow.set -side right
+ pack $gm.$varWindow -side top
+ }
+
+}
+
+
+init_user_interface
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/ocr-ui.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,403 @@
+#!/usr/sww/bin/wish -f
+#
+# user interface code (tcl visuals) for OCR
+# started 9/95, Archie Russell
+
+
+set main_window_width 800
+set main_window_height 800
+set dummy 0
+# I'd like to be able to use the above parameters in here,
+# but I think tcl might get a little angry if I try
+# the size of the window, and the position of its upper left
+set main_window_geometry 800x800+200+100
+
+set menu_bar_width $main_window_width
+set menu_bar_height 50
+set button_bar_width $main_window_width
+set button_bar_height 100
+set display_height 700
+# save a little room for scrollbars, etc.
+
+set canvas_width [expr $main_window_width - 30]
+proc init_user_interface {} {
+
+# tcl requires declaration of global variables used in a fxn
+
+ global main_window_geometry main_window_width main_window_height menu_bar_width menu_bar_height button_bar_width button_bar_height display_height
+
+# toplevel windows are at the same level as 'xterms'
+ toplevel .main_window
+
+# $ sign means 'get the value' (otherwise uses the string)
+ wm geometry .main_window $main_window_geometry
+ wm title .main_window "OCR user interface"
+
+# frames are subwindows that are there mostly to 'hold' other windows
+ frame .main_window.menu_bar -width $menu_bar_width -height $menu_bar_height -relief raised -bd 2
+ init_menu_bar
+
+ frame .main_window.button_bar -width $button_bar_width -height $button_bar_height -relief ridge -bd 5
+ init_button_bar
+
+ frame .main_window.display -width $main_window_width -height $display_height -relief ridge -bd 5
+ init_display
+
+# pack puts things together: this will put the menu_bar window just above the button_bar_window above the display
+
+ pack .main_window.menu_bar .main_window.button_bar .main_window.display -side top -anchor w
+ focus .main_window
+}
+
+proc init_menu_bar { } {
+# this command initializes the main menu bar (stuff like file, etc)
+# shortcuts not working! why?
+
+ menubutton .main_window.menu_bar.file -text "File " -underline 0 -menu .main_window.menu_bar.file.menu -borderwidth 2
+ init_file_menu
+
+ menubutton .main_window.menu_bar.edit -text "Edit " -underline 0 -menu .main_window.menu_bar.edit.menu -borderwidth 2
+ init_edit_menu
+
+ menubutton .main_window.menu_bar.options -text "Options " -underline 0 -menu .main_window.menu_bar.options.menu -borderwidth 2
+ init_options_menu
+
+ pack .main_window.menu_bar.file .main_window.menu_bar.edit .main_window.menu_bar.options -side left -padx 1m -pady 1m -fill x
+
+}
+
+proc init_file_menu { } {
+# this creates the menu associated with the file menubutton
+ menu .main_window.menu_bar.file.menu
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.file.menu add command -label "Open..." -command popup_open_menu
+ .main_window.menu_bar.file.menu add command -label "Close" -command popup_close_menu
+# a separator is just a horizontal line for show
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Save" -command default_save
+ .main_window.menu_bar.file.menu add command -label "Save As..." -command default_save
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Quit" -command popup_quit_dialog_box
+}
+
+proc init_edit_menu { } {
+ global dummy
+ menu .main_window.menu_bar.edit.menu
+ .main_window.menu_bar.edit.menu add radiobutton -label "Nothing" -variable dummy -value 0
+ .main_window.menu_bar.edit.menu add radiobutton -label "Yet" -variable dummy -value 1
+ .main_window.menu_bar.edit.menu add radiobutton -label "Here" -variable dummy -value 2
+}
+
+set word_certainty_value normal
+set screen_view_style facing_page
+proc init_options_menu { } {
+ global dummy word_certainty_value screen_view_style
+ menu .main_window.menu_bar.options.menu
+ .main_window.menu_bar.options.menu add cascade -label "Word Certainty" -menu .main_window.menu_bar.options.menu.word_certainty
+
+ menu .main_window.menu_bar.options.menu.word_certainty
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Stringent" -variable word_certainty_value -value stringent
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Normal" -variable word_certainty_value -value normal
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Lenient" -variable word_certainty_value -value lenient
+
+ .main_window.menu_bar.options.menu add cascade -label "Screen View" -menu .main_window.menu_bar.options.menu.screen_view
+ menu .main_window.menu_bar.options.menu.screen_view
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "facing page" -variable screen_view_style -value facing_page
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "interleave lines" -variable screen_view_style -value interleave_lines
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "translation only" -variable screen_view_style -value translation_only
+
+
+}
+
+proc init_button_bar { } {
+}
+
+proc init_display { } {
+ global display_height canvas_width
+ canvas .main_window.display.work_space -bg white -height $display_height -width $canvas_width -xscrollcommand ".main_window.display.xscroller set" -yscrollcommand ".main_window.display.yscroller set" -scrollincrement 30 -cursor {crosshair black gray}
+# two scrollbars
+ scrollbar .main_window.display.xscroller -command ".main_window.display.work_space xview" -orient horizontal
+ scrollbar .main_window.display.yscroller -command ".main_window.display.work_space yview"
+
+ pack .main_window.display.xscroller -side bottom -fill x
+ pack .main_window.display.work_space .main_window.display.yscroller -side left -fill y
+ .main_window.display.work_space configure -scrollregion { -5000 -5000 5000 5000 }
+ initialize_bindings
+ test_canvas
+}
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+set mouse_mode NONE
+set started_region 0
+set region_count 0
+proc initialize_bindings { } {
+ # facilitates the grabbing of a rectangle of the window
+ # using mouse button 1
+ # and apparently a lot of other junk!
+
+ global region_data regions next_button_data next_buttons kill_button_data kill_buttons arrow_data arrows mouse_mode current_object
+
+# bind .main_window.display.work_space <ButtonPress-1> {
+# if [expr ! [string compare $mouse_mode NONE]] {
+# set current_object [find withtag current]
+# if [expr ($current_object == "") || ((expr ! [lsearch $next_buttons $current_object]) && (expr ! [lsearch $kill_buttons $current_object]))] {
+# set mouse_mode making_region
+# # start creating the region
+# grab set .main_window.display
+# set x_init [.main_window.display.work_space canvasx %x]
+# set y_init [.main_window.display.work_space canvasy %y]
+# set region_id [.main_window.display.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3]
+# set region_data($region_id,x_init) $x_init
+# set region_data($region_id,y_init) $y_init
+# .main_window.display.work_space itemconfigure $region_id -tags region$region_id
+# lappend regions $region_id
+
+# } elseif {[lsearch $next_buttons $current_object] != -1} {
+# set mouse_mode making_arrow
+# grab set .main_window.display
+# set arrow_id [.main_window.display.work_space create line 0 0 1 1]
+# set arrow_data($arrow_id,x_init) $next_button_data($current_object,x_center)
+# set arrow_data($arrow_id,y_init) $next_button_data($current_object,y_center)
+# set arrow_data($arrow_id,start_region) $next_button_data($current_object,region_id)
+# .main_window.display.work_space coords $arrow_id $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init) $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init)
+# .main_window.display.work_space itemconfigure $arrow_id -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$arrow_id
+
+# lappend arrows $arrow_id
+# } elseif {[lsearch $kill_buttons $current_object] != -1} {
+# set mouse_mode killing_region
+# } elseif {[search $prev_buttons $current_object] != -1} {
+# set moude_mode moving_arrow
+# } else {
+# puts stdout unknown-mode
+# }
+# } else {
+# puts stdout "strange: looks like you are in some unknown state. Sorry"
+# }
+# }
+ bind .main_window.display.work_space <ButtonRelease-1> {
+ if [expr ! [string compare $mouse_mode making_region]] {
+ set region_id $current_object
+ set x_final [.main_window.display.work_space canvasx %x]
+ set y_final [.main_window.display.work_space canvasy %y]
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ .main_window.display.work_space coords region$region_id $x_init $y_init $x_final $y_final
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) NONE
+
+ make_region_buttons $region_id
+ grab release .main_window.display
+ set mouse_mode NONE
+ set current_object NONE
+ }
+ }
+
+
+
+
+
+ bind .main_window.display.work_space <B1-Motion> {
+ if [expr ! [string compare $mouse_mode making_region]]
+ {
+ set region_id $current_object
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+ .main_window.display.work_space coords region$region_id $x_init $y_init $curx $cury
+ }
+
+ }
+ bind .main_window.display <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release .main_window.display
+ set started_region 0
+ .main_window.display.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+set arrow_in_progress 0
+proc make_region_buttons { reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [.main_window.display.work_space create rectangle $x_init $y_init [expr $x_init + 20] [expr $y_init + 20] -fill blue -tags "region$reg_id next_button$reg_id"]
+ set next_button_data($next_num,reg_id) $reg_id
+ .main_window.display.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([.main_window.display.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ finish_arrow $reg_id
+ } else {
+ set canvas_x [.main_window.display.work_space canvasx %x]
+ set canvas_y [.main_window.display.work_space canvasy %y]
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [.main_window.display.work_space create rectangle [expr $x_init + 20] $y_init [expr $x_init + 40] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ .main_window.display.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([.main_window.display.work_space find withtag current],reg_id)
+ destroy_region $reg_id .main_window.display.work_space
+ }
+}
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow
+ set path_name .main_window.display.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [.main_window.display.work_space create line $x_start $y_start $x_start $y_start -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 1
+ set current_arrow $arrow
+}
+
+proc destroy_region { reg_id path_name } {
+ $path_name delete region$reg_id
+ puts stdout "Destroying $reg_id"
+}
+
+proc test_canvas { } {
+# just display some junk on the canvas
+ .main_window.display.work_space create text 400 200 -text "Document and text will be displayed here" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill black
+ .main_window.display.work_space create text 400 250 -text "Can be displayed in multiple colors etc." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill red
+ .main_window.display.work_space create text 400 300 -text "Can grab rectangles of stuff here." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill green
+ .main_window.display.work_space create text 400 350 -text "other things semi-working: quit and open (under file)" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill blue
+ .main_window.display.work_space create text 200 200 -font *-times-*-*-*--10-*-*-*-*-*-*-* -fill black -text "If I hit return
+Will it make any difference
+return
+return"
+}
+
+set open_menu_geometry 600x300+300+400
+set current_directory [pwd]
+set open_menu_pattern *
+
+proc popup_open_menu { } {
+# this procedure pops up an interactive box which can be used to open files
+# bug: cannot exit menu without selecting a file
+ global open_menu_geometry open_menu_pattern current_directory
+
+ toplevel .open_menu
+ wm geometry .open_menu $open_menu_geometry
+ wm title .open_menu Open
+
+ # force the user to interact with this box
+ grab set .open_menu
+
+ # directory listing and scrollbar
+ frame .open_menu.dirstuff
+ scrollbar .open_menu.dirstuff.yscroll -command ".open_menu.dirstuff.directory yview"
+ listbox .open_menu.dirstuff.directory -yscrollcommand ".open_menu.dirstuff.yscroll set" -geometry 25x12 -relief raised
+ fill_in_directory_box $current_directory $open_menu_pattern
+
+ bind .open_menu.dirstuff.directory <Double-Button-1> {
+ set file_to_open [selection get]
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+ puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } else {
+ puts stdout "Opening file $file_to_open"
+ destroy .open_menu
+ }
+ }
+ # pattern for listings to match
+ frame .open_menu.pattern_match
+ label .open_menu.pattern_match.label -text "Match files of type:"
+ entry .open_menu.pattern_match.entry -width 5 -relief sunken -bd 2 -textvariable open_menu_pattern
+ # refresh the directory listing after user presses return
+ bind .open_menu.pattern_match.entry <Return> {
+ set current_directory [pwd]
+ clear_directory_box
+ fill_in_directory_box $current_directory $open_menu_pattern
+ }
+
+ pack .open_menu.pattern_match.label .open_menu.pattern_match.entry -side left
+ pack .open_menu.dirstuff.directory .open_menu.dirstuff.yscroll -side left -fill y
+ pack .open_menu.pattern_match .open_menu.dirstuff -side top
+ focus .open_menu.pattern_match.entry
+}
+
+
+
+
+
+
+
+proc clear_directory_box { } {
+ .open_menu.dirstuff.directory delete 0 end
+}
+
+proc fill_in_directory_box { dirname {pattern *} } {
+ foreach i [exec ls -aF $dirname] {
+ if [file isdirectory $i] {
+ .open_menu.dirstuff.directory insert end $i
+ } elseif [string match $pattern $i] {
+ .open_menu.dirstuff.directory insert end $i
+ }
+ }
+}
+
+set quit_dialog_geometry 300x150+500+500
+proc popup_quit_dialog_box { } {
+ global quit_dialog_geometry
+
+ toplevel .quit_dialog
+ wm geometry .quit_dialog $quit_dialog_geometry
+ wm title .quit_dialog Quit
+ grab set .quit_dialog
+
+ message .quit_dialog.msg -text "You are about to quit OCR-orama. All changes you have made will be lost."
+ frame .quit_dialog.buttons
+ button .quit_dialog.buttons.ok -text OK -command quit_ok
+ button .quit_dialog.buttons.cancel -text Cancel -command quit_cancel
+ pack .quit_dialog.buttons.ok .quit_dialog.buttons.cancel -side left -expand 1 -fill x
+ pack .quit_dialog.msg .quit_dialog.buttons -side top -fill x
+
+
+}
+
+proc quit_ok { } {
+ destroy .main_window
+ destroy .quit_dialog
+}
+
+proc quit_cancel { } {
+ global command_not_in_progress
+ set command_not_in_progress 1
+ destroy .quit_dialog
+}
+
+init_user_interface
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/project.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,124 @@
+#include <malloc.h>
+#include <stdarg.h>
+#include "tcl_interface.h"
+#include "project.h"
+#include "histogram.h"
+#include "bitmap.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "status_message.h"
+
+int docommand(char* fmt, ...);
+
+
+
+void draw_bitmap(int x, int y, char* xbm_file)
+{
+ docommand(".t.f.c create bitmap %d %d -bitmap @%s", x, y, xbm_file);
+ docommand("update");
+}
+
+#define LINE_SKIP 5
+#define DRAW_RAYS 0
+#define DEBUG_PROJECT_HISTOGRAM 1
+#define DRAW_PIXELS 0
+#define PRINT_STATUS 1
+
+Histogram* project_histogram(RLEMap* r, double cut_angle)
+{
+ if (ENABLE_USER_INTERFACE)
+ {
+ docommand("update");
+ if(DRAW_RAYS)
+ {
+ docommand(".main_window.display.work_space delete project_ray");
+ }
+ if(DRAW_PIXELS)
+ {
+ docommand(".t.f.c delete project_pixels");
+ }
+ }
+ int num_values = r->imageLength() / LINE_SKIP;
+ int* ret_array = (int*) malloc (sizeof(int) * num_values);
+ for(int current_value = 0; current_value < num_values; current_value++)
+ {
+ ret_array[current_value] = calculate_one_rle_ray_weight(r, cut_angle, current_value*LINE_SKIP); /* the real work */
+ }
+ Histogram* h = new Histogram(num_values - 1, ret_array, cut_angle);
+ if(PRINT_STATUS && ENABLE_USER_INTERFACE)
+ {
+ set_status("Deskewing: @%.3lf degrees, standard deviation = %.3lf", cut_angle, h->get_standard_dev());
+ }
+ if(0)
+ h->display();
+ return h;
+}
+
+inline double deg_to_rad(double deg)
+{
+ return deg * (M_PI / 180.00);
+}
+
+int PRINT_RAYS = 0;
+
+int calculate_one_rle_ray_weight(RLEMap* rlemap, double cut_angle, int row_num)
+{
+ /* cuts through b at cut_angle (cut angle in DEG) , adding up the weights */
+ /* of the pixels in the cut line */
+ /* RLEMap methodology: Translate the angular slope into rise/run slope,
+ find out how many bits (how much run) to read horizontally before
+ jumping upwards one bit. Make use of RLEMap::pixels_between(row,st,fi) */
+ /* possible optimizations: calculate this initial stuff "run_bits" "slope"
+ once for each histogram, pass as args to this */
+
+
+ double slope;
+ double float_run_bits;
+ int y_update;
+ double new_x;
+ double rad_cut_angle = deg_to_rad(cut_angle);
+
+ slope = tan(rad_cut_angle);
+ float_run_bits = fabs((((double)1)/sin(rad_cut_angle)) * cos(rad_cut_angle));
+
+ int ray_weight = 0;
+
+ double cur_x = 0;
+ int cur_y = row_num;
+
+ if(cut_angle < 0)
+ y_update = -1;
+ else
+ y_update = 1;
+
+
+ int image_height = rlemap->imageLength();
+ int image_width = rlemap->imageWidth();
+
+ while(((new_x = cur_x + float_run_bits) < image_width) &&
+ (cur_y >= 0) && (cur_y < image_height))
+ {
+ /* watch out about going past rlemap bounds */
+ ray_weight += rlemap->pixels_between((int)cur_x, (int)new_x, cur_y);
+ cur_x = new_x + 1;
+ cur_y = cur_y + y_update;
+ }
+
+ /* and once more for posterity */
+ if((cur_y >= 0) && (cur_y < image_height) && (cur_x < image_width))
+ ray_weight += rlemap->pixels_between((int)cur_x, image_width, cur_y);
+
+ if(1)
+ if(DRAW_RAYS)
+ {
+ docommand(".main_window.display.work_space create line %d %d %d %d -fill blue -tags {project_ray IMAGE_TAG}", 0, row_num, (int)cur_x, (int)cur_y);
+ }
+ if(PRINT_RAYS)
+ printf("---Ray weight for cut angle %lf = %d\n", cut_angle, ray_weight);
+ return ray_weight;
+}
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/project.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,15 @@
+#ifndef PROJECT
+#define PROJECT 1
+
+#include "RLEPair.h"
+#include "RLEMap.h"
+#include "bitmap.h"
+#include "histogram.h"
+
+void draw_sample(int x, int y);
+Bitmap* sample_to_2d();
+Histogram* project_histogram(Bitmap* b, double cut_angle);
+Histogram* project_histogram(RLEMap* r, double cut_angle);
+int calculate_one_ray_weight(Bitmap* b, double cut_angle, int row_num);
+int calculate_one_rle_ray_weight(RLEMap* rlemap, double cut_angle, int row_num);
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/prop.doc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,50 @@
+
+Here are my property and distance settings currently. Property
+27 and 28 are weighted at 3. I am not sure why The disjoint property
+had to be so heavily weighted.
+
+
+void Component::setProperties(BitMap * map) // was BitMap
+/*--------------------------------------------------------------
+Primary Purpose: Set the property vector for this component
+Arguments: The BitMap to which this component belongs
+Return Value:
+Effects: Property 0-24
+ The component is divided into a 5 by 5 grid. A gray
+ scale (0 - 255) for each section is determined. The gray scale
+ is 0 for all white, 255 for all black, but normally will be somewhere
+ between the two. The gray scales are represented in properties
+ 0-24.
+ Property 25 is the grayscale accross the top.
+ Property 26 is the grayscale accross the bottom.
+ Property 27 is the width/height ratio again scaled to (0-255)
+ Actually the formula for property 27 is
+ width/ height * 255 if height > width
+ 1- height/width * 255 if width > height
+ This way near 0 is very tall and thin
+ near 128 height near width
+ near 255 very wide
+ Property 28 is Indicator of a vertically disjoint character
+ like i and j. 0 - not disjoint. 255 - disjoint
+
+
+
+
+
+Distance Component::distance(Component * comp)
+/*--------------------------------------------------------------
+Primary Purpose: Determines heuristic distance between two components
+Arguments: Another component to compare
+Return Value: integer value which represents the distance between two
+ components. Distance = sum over i of
+ weight *square (this->fproperty[i] - comp->fproperty[i])
+ weight for i == 27, 28 is 3 weight is 1 for all other
+ properties
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/properties.doc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,474 @@
+#include "system.h"
+#include "Component.h"
+#include "BitMap.h"
+#include <assert.h>
+#include "list.h"
+#include "tcl_interface.h"
+
+/*** Component.cc
+ Member functions for Components
+ Component functions defined in Component.h
+ rev 12/9/95 KM
+***/
+
+
+Components::Components()
+:List()
+{
+}
+
+
+Components::~Components()
+{
+ for (ListElement *ptr = first; ptr != NULL && ptr->item!=NULL;
+ ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Component *) (ptr->item);
+ }
+ while(!IsEmpty())
+ Remove();
+
+}
+
+
+int Component::AddToComponent(ListElement* intrvl, RLEMap* rlemap)
+//this needs to be fixed to trap page boundaries
+//or else pad the page with a blank line at top and bottom
+{
+ assert(intrvl != NULL);
+ List* list = new List(); //make a new queue
+ ListElement* current;
+ ListElement* nextelt;
+ int counter = 0;
+ int currentRow;
+
+ if (intrvl->previous != NULL)
+ intrvl->previous->next = intrvl->next;
+ else rlemap->fMapData[((RLEPair *) intrvl->item)->row]->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = intrvl->previous;
+ list->first = intrvl; //put starting interval on queue
+ list->last = intrvl;
+ list->length = 1;
+ intrvl->next = NULL;
+ intrvl->previous = NULL;
+ currentRow = 0;
+ while ((intrvl = list->first) != NULL &&
+ currentRow < rlemap->imageLength()) //Take an interval off queue
+ {
+ currentRow = ((RLEPair *) intrvl->item)->row;
+ for (int i=-1; i < 2; i+=2) {
+ current = rlemap->fMapData[currentRow+i]->first;
+ while ((current != NULL)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
+
+// printf("Looking at an interval on row %d that goes from %d to %d\n",
+// currentRow, ((RLEPair *) intrvl->item)->start,
+// ((RLEPair *) intrvl->item)->end);
+
+ if ((((RLEPair *) current->item)->end
+ >= ((RLEPair *) intrvl->item)->start-1)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
+// printf("Adding connection for interval on row %d that goes from %d to %d\n", currentRow+i,
+// ((RLEPair *) current->item)->start,
+// ((RLEPair *) current->item)->end);
+
+ if (current->previous != NULL)
+ current->previous->next = current->next; //take off RLEMap
+ else
+ rlemap->fMapData[currentRow+i]->first = current->next;
+ if (current->next != NULL)
+ current->next->previous = current->previous;
+ nextelt = current->next;
+ list->last->next = current; //add to queue
+ current->previous = list->last;
+ list->last = current;
+ current->next = NULL;
+ current = nextelt;
+ list->length++;
+ } else
+ current = current->next;
+ }
+ }
+
+ if ((((RLEPair *) intrvl->item)->start < ful.x()) || (ful.x()==-1)) {
+ ful.x() = ((RLEPair *) intrvl->item)->start;
+// printf("Changed ful.x to %d\n", ful.x());
+ }
+ if ((((RLEPair *) intrvl->item)->end > flr.x()) || (flr.x()==-1)) {
+ flr.x() = ((RLEPair *) intrvl->item)->end;
+// printf("Changed flr.x to %d\n", flr.x());
+ }
+ if ((((RLEPair *) intrvl->item)->row < ful.y()) || (ful.y()==-1)) {
+ ful.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed ful.y to %d\n", ful.y());
+ }
+ if ((((RLEPair *) intrvl->item)->row > flr.y()) || (flr.y()==-1)) {
+ flr.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed flr.y to %d\n", flr.y());
+ }
+ list->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = NULL;
+ delete ((RLEPair *) (intrvl->item));
+ delete intrvl; //so the letter O won't go forever
+ counter++;
+ list->length--;
+ }
+
+ delete list;
+ return counter;
+
+}
+
+void Component::setProperties(BitMap * map) // was BitMap
+/*--------------------------------------------------------------
+Primary Purpose: Set the property vector for this component
+Arguments: The BitMap to which this component belongs
+Return Value:
+Effects: The component is divided into a 5 by 5 grid. A gray
+ scale (0 - 255) for each section is determined. The gray scale
+ is 0 for all white, 255 for all black, but normally will be somewhere
+ between the two. The gray scales are represented in properties
+ 0-24.
+ Property 25 is the grayscale accross the top.
+ Property 26 is the grayscale accross the bottom.
+ Property 27 is the width/height ratio again scaled to (0-255)
+ Actually the formula for property 27 is
+ width/ height * 255 if height > width
+ 1- height/width * 255 if width > height
+ This way near 0 is very tall and thin
+ near 128 height near width
+ near 255 very wide
+ Property 28 is Indicator of a vertically disjoint character
+ like i and j.
+ Also the total number of black pixels is set in fnumBits.
+ // This is not used at this time.
+Constraints: The data fields ful and flr must already be set
+ before calling this function. These fields specify a bounding
+ box for the character within the BitMap.
+Rev: 12/9 KM
+---------------------------------------------------------------*/
+{
+ if (ful > flr)
+ printf("Problem\n");
+ assert (ful <= flr);
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+ float height, width;
+ int propNum;
+ float darkest = 0;
+ float lightest;
+ int darkrow = 0;
+ int lightrow = 0;
+
+ Point sectionLr, sectionUl;
+ // Set Number of bits
+ fnumBits = map->pixelsInRegion(ful, flr);
+
+ setSectionFlags(hflag, vflag);
+ for (int r = 0; r < NumVertDiv; r++)
+ for (int c = 0; c < NumHorizDiv; c++)
+ {
+ propNum = (r * NumHorizDiv) + c;
+ sectionUl = Point(hflag[c], vflag[r]);
+ sectionLr = Point(hflag[c+1]-1, vflag[r+1]-1);
+ if (sectionUl <= sectionLr)
+ fproperty[propNum] = map->grayScale(sectionUl, sectionLr);
+ assert(fproperty[propNum] >= 0 && fproperty[propNum] < 256);
+ }
+
+ // set the height/width ratio
+ // 0 is very thin 128 is even 256 is very wide.
+ width = flr.x() - ful.x() + 1;
+ height = flr.y() - ful.y() + 1;
+
+ // Grayscale across the top - Indicator of top bar
+ sectionUl = Point(ful.x(), ful.y());
+ sectionLr = Point(flr.x(), ful.y() + (int)(height/(NumVertDiv*2)));
+ fproperty[25] = map->grayScale(sectionUl, sectionLr);
+
+ // Grayscale across bottom - Indicator of a foot for l opposed to 1
+ sectionUl = Point(ful.x(), flr.y() - (int)(height/(NumVertDiv*2)));
+ sectionLr = Point(flr.x(), flr.y());
+ fproperty[26] = map->grayScale(sectionUl, sectionLr);
+
+ float hdivw = (float)height/width;
+ float wdivh = (float) width/height;
+ if (width > height)
+ fproperty[27]= (short int) ((1- hdivw/2)*255);
+ else
+ fproperty[27] = (short int)((wdivh/2)* 255);
+
+ // is this a disjoint character like i or j 255 = yes 0 = no
+ fproperty[28]=0;
+ lightest = width;
+ for(int row = ful.y(); row < flr.y(); row++)
+ {
+ int pixelsThisRow = pixelsBetween(map->row(row), ful.x(), flr.x());
+ if(!(pixelsThisRow))
+ fproperty[28]=255;
+ }
+
+ fproperty[29]= 0;
+ for(int p = 0; p < numProperties; p++)
+ assert(fproperty[p] >= 0 && fproperty[p] < 256);
+
+
+}
+
+void Component::setSectionFlags(short int hflag[], short int vflag[])
+/*--------------------------------------------------------------
+Primary Purpose: Breaks this component into a grid NumHorizDiv X NumVertDiv
+ for determining grayscale property vectors.
+Arguments: hflag[] is an empty array to be filled by this procedure with
+ the starting columns of each horizontal subdivision. vflag[] will
+ be filled with the vertical subdivisions.
+Effects: fills hflag[] with the starting column for each subdivision.
+ The last element of the array is actually the pixel immediately
+ following the last subdivision. The last subdivision contains any
+ remaining pixels that did not divide evenly amongst the divisions.
+ vflag[NumHorizDiv] is comparable for vertical supdivisions.
+ Example ful = (0,25) flr = (52,46) NumHorizDiv = NumVertDiv = 5
+ hflag[6] = { 0,10,20,30,40,53 }
+ vflag[6] = {25.29.33.37.41.47 }
+Constraints: ful and flr must be set to mark the bounding box before
+ calling this procedure.
+Rev: 10/27 KM
+---------------------------------------------------------------*/
+{
+ int ulx = ful.x(); int uly = ful.y();
+ int lrx = flr.x(); int lry = flr.y();
+
+ int width = lrx - ulx+1;
+ int height = lry - uly+1;
+
+ int horizDiv = width/NumHorizDiv;
+ int vertDiv = height/NumVertDiv;
+
+ int horizExtra = width - horizDiv*NumHorizDiv;
+ int vertExtra = height - vertDiv*NumVertDiv;
+
+ int i, add;
+ for (i = 0; i < NumHorizDiv; i++)
+ {
+ if(horizExtra - i > 0) add = i; else add = horizExtra;
+ hflag[i] = ulx + (i*horizDiv)+ add;
+ }
+ hflag[i] = lrx + 1; // Closes off last division
+
+ int j;
+ for(j = 0; j < NumVertDiv; j ++)
+ {
+ if(vertExtra - j > 0) add = j; else add = vertExtra;
+ vflag[j] = uly + (j*vertDiv)+ add;
+ }
+ vflag[j] = lry + 1;
+
+
+
+
+}
+
+
+Distance Component::distance(Component * comp)
+/*--------------------------------------------------------------
+Primary Purpose: Determines heuristic distance between two components
+Arguments: Another component to compare
+Return Value: integer value which represents the distance between two
+ components. Distance = sum over i of
+ weight *square (this->fproperty[i] - comp->fproperty[i])
+ weight for i == 27, 28 is 3 weight is 1 for all other
+ properties
+Constraints: setProperties must have been run on both components
+
+Rev: 11/1 KM
+---------------------------------------------------------------*/
+{
+ Property * a = fproperty;
+ Property * b = comp->properties();
+ Distance dist=0;
+ int dif=0;
+ int worst = 0;
+ int weight = 1;
+
+ for(int i= 0; i < numProperties; i++)
+ {
+
+
+ if (i == 27 || i == 28) weight = 3;
+ else weight = 1;
+
+ dif = (a[i] - b[i]);
+ dist += weight*dif*dif;
+ }
+
+ return dist;
+}
+
+
+
+
+
+
+
+
+
+void printVector(short int vector[], int size)
+{
+ for (int i = 0; i < size; i++)
+ cout << vector[i] << " " ;
+ cout << endl;
+
+}
+
+
+void testProperties(Component* c, BitMap * map)
+{
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+
+ cout << "First test subDivisions " << endl;
+ c->setSectionFlags(hflag, vflag);
+ cout << "Horizontal flags" <<endl;
+ printVector(hflag, NumHorizDiv + 1);
+ cout << "Vertical flags" <<endl;
+ printVector(vflag, NumHorizDiv + 1);
+
+ cout << "Now lets look at the properties " << endl;
+ // setSectionFlags will actually get called again within setProperties
+ c->setProperties(map);
+ printVector(c->properties(), NumHorizDiv*NumVertDiv + 1);
+ cout << endl << " The distance of this component from itself: " << " ";
+ cout << c->distance(c) << endl;
+
+}
+
+void Component::display_bounding_box()
+{
+ int ulx = (ul()).x();
+ int uly = (ul()).y();
+ int lrx = (lr()).x();
+ int lry = (lr()).y();
+ scale(ulx); scale(uly); scale(lrx); scale(lry);
+
+ docommand(".main_window.display.work_space create rectangle %d %d %d %d -outline blue -tags IMAGE_TAG", ulx, uly, lrx, lry);
+}
+
+Distance Component::recognize(Component * learnedchars)
+// This is out of date. Current recognize is below
+{
+ Distance d, nextd;
+ char id;
+
+// printf("Another call to recognize\n");
+ d = (256*256)*numProperties; // this is the biggest distance
+
+
+
+ for (int i = 0; i < 256; i++)
+ {
+ if(learnedchars[i].confid() != 0)
+ {
+ nextd = distance(&learnedchars[i]);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = (char) i;
+ }
+ }
+
+ }
+
+ fasciiId = id;
+ /* printf("Recognized a Component: %c\n", id); */
+ return d;
+}
+
+
+Distance Component::recognize(Components * learnedgroups, bool allGroups)
+{
+ Distance d, worstDistance,nextd;
+ char id;
+ short int fontid;
+ float tempd;
+ worstDistance = 150000;
+
+// printf("Another call to new recognize\n");
+ d = (65536)*numProperties; // this is the biggest distance
+
+
+ fconfid = 0;
+
+ for(int g = 0; g < NumCharGroups &&
+ ((fconfid < ConfidenceThreshold) || allGroups); g++)
+ {
+ int offset = (charGroup+g) % NumCharGroups;
+ if (offset == 4 && charGroup != 4) continue;
+ for (ListElement* ptr = learnedgroups[offset].first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ Component * item = (Component *) ptr->item;
+ nextd = distance(item);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = item->fasciiId;
+ fontid = item->ffontId;
+ }
+
+ }
+
+ if (d >= worstDistance)
+ tempd = worstDistance - 1;
+ else tempd = d;
+
+ fconfid = (unsigned short int)
+ (255 - (tempd/worstDistance)*256);
+ if(charGroup == 4) break; // dont check other groups for floaters
+ }
+
+
+ fasciiId = id;
+
+
+ ffontId = fontid;
+// printf("Recognized a Component: %c in font#%d %d %u\n",
+// id, fontid, fconfid, d);
+
+
+
+ return d;
+}
+
+
+
+int Component::vertShrink(BitMap * bitmap)
+{
+ int r;
+ int shrunk = 0;
+ for(r = ful.y(); r < flr.y(); r++)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ ful.y() = r;
+ shrunk = 1;
+ break;
+ }
+ for(r = flr.y(); r > ful.y(); r--)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ flr.y() = r;
+ shrunk = 1;
+ break;
+ }
+
+ return shrunk;
+ }
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/recog.eqn Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,5 @@
+ 52 708 53 278
+ 54 593 54 812
+ 56 404 56 560
+ 56 402 56 560
+ 57 247 58 720
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/recog.tmp Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,3 @@
+)-P
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/status_message.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,115 @@
+#include "tcl_interface.h"
+#include "status_message.h"
+
+/* these are ugly, but who cares? */
+
+/* you must reset this to 0 if you want to use these fxns */
+double last_status = 0.0;
+
+void set_string_status(char* s, int partial, int full)
+{
+ set_string_status(s, (double)partial, (double)full);
+}
+
+void set_string_status(char* s, double partial, double full)
+{
+ double fraction = (double)partial / (double)full;
+ if(fraction > 0.9 && last_status < 0.9)
+ {
+ set_status("%s: 90%%...", s);
+ last_status = 0.9;
+ }
+ else if(fraction > 0.8 && last_status < 0.8)
+ {
+ set_status("%s: 80%%...", s);
+ last_status = 0.8;
+ }
+ else if(fraction > 0.7 && last_status < 0.7)
+ {
+ set_status("%s: 70%%...", s);
+ last_status = 0.7;
+ }
+ else if(fraction > 0.6 && last_status < 0.6)
+ {
+ set_status("%s: 60%%...", s);
+ last_status = 0.6;
+ }
+ else if(fraction > 0.5 && last_status < 0.5)
+ {
+ set_status("%s: 50%%...", s);
+ last_status = 0.5;
+ }
+ else if(fraction > 0.4 && last_status < 0.4)
+ {
+ set_status("%s: 40%%...", s);
+ last_status = 0.4;
+ }
+ else if(fraction > 0.3 && last_status < 0.3)
+ {
+ set_status("%s: 30%%...", s);
+ last_status = 0.3;
+ }
+ else if(fraction > 0.2 && last_status < 0.2)
+ {
+ set_status("%s: 20%%...", s);
+ last_status = 0.2;
+ }
+ else if(fraction > 0.1 && last_status < 0.1)
+ {
+ set_status("%s: 10%%...", s);
+ last_status = 0.1;
+ }
+}
+
+void set_display_status(double partial, double full)
+{
+ set_string_status("Displaying Image", partial, full);
+}
+
+void set_display_status(int partial, int full)
+{
+ set_string_status("Displaying Image", partial, full);
+}
+
+void set_read_status(double partial, double full)
+{
+ set_string_status("Reading Image", partial, full);
+}
+
+void set_read_status(int partial, int full)
+{
+ set_string_status("Reading Image", partial, full);
+}
+
+void set_rotation_status(double partial, double full)
+{
+ set_string_status("Rotating Image", partial, full);
+}
+
+void set_rotation_status(int partial, int full)
+{
+ set_string_status("Rotating Image", partial, full);
+}
+
+void set_recognize_status(int p, int f)
+{
+ set_string_status("Recognizing Characters", p, f);
+}
+void set_extract_status(int p, int f)
+{
+ set_string_status("Extracting Words", p, f);
+}
+void set_component_status(int p, int f)
+{
+ set_string_status("Extracting Characters", p, f);
+}
+
+void set_text_display_status(int p, int f)
+{
+ set_string_status("Displaying text", p, f);
+}
+
+void set_spellcheck_status(int p, int f)
+{
+ set_string_status("Spellchecking", p, f);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/status_message.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,25 @@
+#ifndef STATUS_MESSAGE_H
+#define STATUS_MESSAGE_H 1
+#include "tcl_interface.h"
+
+
+extern double last_status;
+
+void set_string_status(char* s, int partial, int full);
+void set_string_status(char* s, double partial, double full);
+void set_display_status(int partial, int full);
+void set_rotation_status(int partial, int full);
+void set_read_status(int partial, int full);
+void set_display_status(double partial, double full);
+void set_rotation_status(double partial, double full);
+void set_read_status(double partial, double full);
+void set_recognize_status(int p, int f);
+void set_extract_status(int p, int f);
+void set_component_status(int p, int f);
+void set_spellcheck_status(int p, int f);
+void set_text_display_status(int p, int f);
+
+#endif
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/status_messsage.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,212 @@
+unknown mode: new
+.login: No such file or directory
+% ocrchie
+ocrchie
+
Done initializing new tcl commands
+Done initializing link variables
+Learning from train.tif and train.txt sych 0
+open succeeded on file train.tif. length = 300. width = 2400
+Extracting Components
+
+@çÒ@ ÷
+@
+Learning from 4.header.tif and 4.header.txt sych 1
+open succeeded on file 4.header.tif. length = 129. width = 1825
+Extracting Components
+
+Opening train.tif
+open succeeded on file train.tif. length = 300. width = 2400
+SCALE_FACTOR = 1.000000 Skip = 1.000000
+Starting deskew process: time = 0.000000
+Determining Line boundaries
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+abcdefghijklmnopqrstuvwxyz: ;\
+ABCDEFGHIJKLMNOPQRSTUVWXYZ
+0 1 234567890~ ! @#$%%^&* ()+=-,.<<>>/?'
+Error: Huh?Opening 4.col0.tif
+open succeeded on file 4.col0.tif. length = 2461. width = 877
+SCALE_FACTOR = 1.000000 Skip = 1.000000
+Starting deskew process: time = 0.000000
+Determining Line boundaries
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+dispersion and models of temporal price dis-
+persion. Most models of spatial price disper-
+sion, such as the Salop-Stiglitz model or the
+Wilde-Schwartz model, have equilibria with
+specific prices being charged with positive
+probability mass. The above argument shows
+that such strategies cannot be profit-max-
+iMzing Nash behavior in a temporal ran-
+domizing model.
+Since there are no point masses in the
+equilibrium density, the cumulative distribuM
+tion function will be a continuous function
+On (P*, T ). Let E(P ) be the cumuIative dis-
+tribution function for y( P ); thus y(P) =
+E^(P ) almost eveWhere.
+We can now construct the expected profit
+function for a representative store. When a
+store charges price P, exactIy two events are
+relevant. lt may be that P is the smallest
+price heing charged, in which case, the given
+store gets all of the informed customers.
+nis event happens only if all the other
+8tores charge prices higher than P, an event
+which has probability (l - F( P ))*- ' . On the
+other hand, there may be some store with a
+lower price, in which case the store in ques-
+tion only gets its share of the uninformed
+customers. This event happens with proba-
+bility l - (l - F( P))^ - '. (By Proposition 3
+we can neglect the prohahiIity of any ties.)
+Hence the expected profit of a representa-
+.
+tIve store Is
+/: ( N( P )( l - E( P ))^- '
+
++ @( P ) ( l - ( l - F( P ))^- ' ) 1/( P ) @
+where N( P ) =P( C+ I ) - C( C+ I )
+HI( P ) -P C- %( C )
+The maximization problem of the firm is
+to choose the density function y( P) so as to
+maximize expected profits su%ect to the
+constraInts:
+/V
+1( P ) ? 0 ; .y( P ) @ - l
+It is clear that all prices that are charged
+with positive density must yield the same
+Error: Huh?Opening hal4eqn.tif
+open succeeded on file hal4eqn.tif. length = 1349. width = 1753
+SCALE_FACTOR = 1.000000 Skip = 1.000000
+Starting deskew process: time = 0.000000
+Determining Line boundaries
+109.0 449.0 3 354 538
+109.0 449.0 0 61 89
+1622.0 719.0 4 641 780
+1622.0 719.0 3 354 538
+390.0 913.0 5 878 961
+390.0 913.0 4 641 780
+1450.0 919.0 5 878 961
+1450.0 919.0 5 878 961
+401.0 1108.0 6 1059 1154
+401.0 1108.0 5 878 961
+1095.0 1104.0 6 1059 1154
+1095.0 1104.0 6 1059 1154
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+we can neglect the probability ot any ties.)
+Hence the expected profit of a representa-
+tive store is
+/ P ^. ' ^ * ' ^ * ' ' - ^ ' ^ * * ^ - '
++ w 1 ( P ) ( l - ( l - E ( P ) ) ^ - ' ) ) y ( P ) d T
+where w S ( P ) - - P ( C + I ) - C ( C + I )
+E 1 ( P ) - - P G - C ( C )
+The maximization problem of the firm is
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+we can neglect the probability ot any ties.)
+Hence the expected profit of a representa-
+tive store is
+/ P ^. ' ^ * ' ^ * ' ' - ^ ' ^ * * ^ - '
++ w 1 ( P ) ( l - ( l - E ( P ) ) ^ - ' ) ) y ( P ) d T
+where w S ( P ) - - P ( C + I ) - C ( C + I )
+E 1 ( P ) - - P G - C ( C )
+The maximization problem of the firm is
+Calling interactive learn from Tcl
+Learning from recog.tmp and 1 sych 0
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+we can neglect the probability of any ties.)
+Hence the expected profit of a representa-
+tive store is
+<Int> P r* { <pi> s ( p ) ( 1 - F ( p ) ) n - 1
++ <pi> f ( p ) [ l - ( l - F ( P ) ) n - 1 ] } f ( p ) d p
+where <pi> s ( P ) = - P ( + + I ) - C ( + + I )
+<pi> f ( p ) = - U - c C ( ) )
+The maximization problem of the firm is
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+we can neglect the probability of any ties.)
+Hence the expected profit of a representa-
+tive store is
+<Int> P r* { <pi> s ( p ) ( 1 - F ( p ) ) n - 1
++ <pi> f ( p ) [ l - ( l - F ( P ) ) n - 1 ] } f ( p ) d p
+where <pi> s ( P ) = - P ( + + I ) - C ( + + I )
+<pi> f ( p ) = - U - c C ( ) )
+The maximization problem of the firm is
+436.9u 302.5s 14:08 87%
+% ocrchie
+ocrchie
+
Done initializing new tcl commands
+Done initializing link variables
+Learning from train.tif and train.txt sych 0
+open succeeded on file train.tif. length = 300. width = 2400
+Extracting Components
+
+@çÒ@ øˆ
+@
+Learning from 4.header.tif and 4.header.txt sych 1
+open succeeded on file 4.header.tif. length = 129. width = 1825
+Extracting Components
+@ @ƒ ÅÍ ¢¥ •˜
+Opening train.tif
+open succeeded on file train.tif. length = 300. width = 2400
+SCALE_FACTOR = 1.000000 Skip = 1.000000
+Starting deskew process: time = 0.000000
+Determining Line boundaries
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+Segmentation fault (core dumped)
+11.5u 4.7s 0:33 48%
+% Segmentation fault (core dumped)
+Segmentation fault (core dumped)
+
Badly placed ()'s.
+% rm core
+rm core
+
% run
+run
+
run: Command not found.
+% ocrchie
+ocrchie
+
Done initializing new tcl commands
+Done initializing link variables
+Learning from train.tif and train.txt sych 0
+open succeeded on file train.tif. length = 300. width = 2400
+Extracting Components
+
+@çÒ@ øˆ
+@
+Learning from 4.header.tif and 4.header.txt sych 1
+open succeeded on file 4.header.tif. length = 129. width = 1825
+Extracting Components
+
+Opening int.tif
+open succeeded on file int.tif. length = 227. width = 277
+SCALE_FACTOR = 1.000000 Skip = 1.000000
+Starting deskew process: time = 0.000000
+Determining Line boundaries
+Calling recognize from Tcl
+Extracting Components
+Recognizing document
+/: ' ^
+Thank you for using OCRchie.
+16.0u 12.4s 0:35 81%
+% grep Extracting Components {*.cc *.tcl}
+grep Extracting Components {*.cc *.tcl}
+
Missing }.
+% grep Extracting Components *.cc
+grep Extracting Components *.cc
+
grep: can't open Components
+Page.cc: printf("Extracting Components\n");
+status_message.cc: set_string_status("Extracting Words", p, f);
+status_message.cc: set_string_status("Extracting Characters", p, f);
+%
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/system.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,242 @@
+#include "system.h"
+#include "Point.h"
+
+
+// Global Variables
+Point NOPNT(-1,-1); // Used for default entries
+/* Global variables used to fine tune OCR. These can be adjusted
+ without recompiling by setting them in link_vars.tcl */
+int NoiseTolerance = 1; // Minumum number of pixels in a line of text
+int MinLineSize = 5; // Minimum number of rows in a line of text
+int MinVertSeparation = 0; // Minimum number of rows between lines of text
+int MinHorizSeparation = 1; // Minimum number of cols between characters
+int ConfidenceThreshold = 150; //Minimum confidence for some operations
+int JoinTolerance = 6; // Max number of pixels joining fused chars.
+
+
+
+/* Number of properties in property vector for Components **/
+int numProperties = 30;
+
+/* Grid size for gray scale analysis */
+int NumHorizDiv = 5;
+int NumVertDiv = 5;
+
+// The next four are used in character grouping set in Page::extractComponents
+/* Group 0 - amo
+ Group 1 - Descenders yjp
+ Group 2 - Ascenders JPK
+ Group 3 - Both descenders and Ascenders ()
+ Group 4 - floaters * - `
+*/
+unsigned int NumCharGroups=5;
+int MaxVertSize = 50; // Max vert pixels in char (used for baseline)
+int BaseLineTolerance = 10; // How far in 1/x of line size from base is okay
+int TopLineTolerance = 10; // How far in 1/x of line size from top is okay
+ // 20 = 5%, 10 = 10%
+int MinComponentSize = 16; // Minimum number of pixels in smallest character
+
+uchar CharBitsSet[256]; // Table of number of bits set in each num 0-256
+ // Used for determining gray scale and pixel counts
+
+/** Some globals set in learn() or readLearnedChars(). These are just starting
+ values **/
+
+double MaxHWRatio = 0.0;
+double MinHWRatio = 1000;
+int MinWidth = 1000; // Min component width in learned set
+
+
+
+
+Component * LearnedChars; // Learned character averages /** NOT USED **/
+Components * LearnedGroups=NULL; //Learned character list array by group type
+
+
+/*** Some values for TCL/TK interface. These variables can be
+ set in the file link_vars.tcl without recompiling ***/
+
+int ENABLE_USER_INTERFACE = 0;
+int VERY_LOW_CONFIDENCE = 150;
+int LOW_CONFIDENCE = 200;
+int DISPLAY_LINE_BOUNDARIES = 0;
+int DISPLAY_BOUNDING_BOXES = 0; // boxes around components
+int SPELLCHECK = 0;
+int DISPLAY_IMAGE = 1;
+int DESKEW_METHOD = BITMAP_DESKEW;
+double ZONING_SCALE_FACTOR = .50;
+double SCALE_FACTOR = 0.5;
+
+TclMode mode = REGULAR;
+
+void initCharBitsSet()
+// Initializes lookup table for the number of bits set in a uchar
+{
+ int pixCount;
+ for (int c = 0; c<256;c++)
+ {
+ pixCount = 0;
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+ CharBitsSet[c]=pixCount;
+ }
+}
+
+char* backslashify(char* w)
+/* backslashes all $ " [] {} () */
+{
+ int length = strlen(w);
+ char* new_word = (char*)malloc(length*2);
+ int new_word_pos = 0;
+ for(int i = 0; i < length; i++)
+ {
+ if((w[i] == '$') ||
+ (w[i] == '[') ||
+ (w[i] == ']') ||
+ (w[i] == '\\') ||
+ (w[i] == '{') ||
+ (w[i] == '}') ||
+ (w[i] == '(') ||
+ (w[i] == ')') ||
+ (w[i] == ';'))
+ {
+ new_word[new_word_pos] = '\\';
+ new_word[new_word_pos+1] = w[i];
+ new_word_pos += 2;
+ }
+ else
+ {
+ new_word[new_word_pos] = w[i];
+ new_word_pos += 1;
+ }
+ }
+ new_word[new_word_pos] = '\0';
+ return new_word;
+}
+
+void invertBitsInBuffer(uchar * buf, int size)
+{
+ for(int i = 0; i < size; i++)
+ buf[i] = ~buf[i] ;
+
+}
+
+void clearBitsInBuffer(uchar * buf, int size)
+{
+ for(int i = 0; i < size; i++)
+ buf[i]=0;
+
+
+}
+
+short int countBitsSet(uchar c)
+{
+ int pixCount = 0;
+/*
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+*/
+ if (!(CharBitsSet['f']))
+ initCharBitsSet();
+ return CharBitsSet[c];
+}
+
+int pixelsBetween(uchar * ar, int start, int end)
+{
+ // Counts the number of black pixels between start and end
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int pixCount=0, startOffset, endOffset;
+ uchar nextChar;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // count the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ nextChar = ar[i];
+ pixCount += countBitsSet(nextChar);
+ }
+ // Now add in end peices
+ // Get our part of the starting character
+ // Add in just the last part of the char (get rid of hi bits)
+ nextChar = ar[startCharNum] << startOffset;
+ if (startCharNum != endCharNum )
+ {
+ pixCount += countBitsSet(nextChar);
+ // Get our part of the ending character,
+ // Add in just the first endOffset bits (get rid of lo bits)
+ nextChar = ar[endCharNum] >> (8 - endOffset);
+ pixCount += countBitsSet(nextChar);
+ }
+ else
+ {
+ // just shift the adjusted starting char
+ int shift = (8-endOffset)+startOffset;
+ pixCount += countBitsSet(nextChar >> shift);
+ }
+ return pixCount;
+
+}
+
+
+void setRange(uchar ar[], int start, int end)
+// Sets bits from position start to position end
+{
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int startOffset, endOffset;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // set the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ ar[i] = 255;
+ }
+ // Now set end peices
+ if (startCharNum != endCharNum )
+ {
+ ar[startCharNum] |= (255 >> startOffset);
+ ar[endCharNum] |= (255 << (8 - endOffset));
+ }
+ else // start and end char are the same
+ {
+ char mask = 255 >> startOffset;
+ mask &= 255 << (8-endOffset);
+ ar[endCharNum] |= mask;
+ }
+
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/system.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,185 @@
+#ifndef _SYSTEM_H
+#define _SYSTEM_H
+#include <stdio.h>
+#include <stdlib.h>
+#include "/usr/sww/share/include/tiffio.h"
+#include <assert.h>
+#include <bool.h>
+#include <iostream.h>
+#include <stream.h>
+#include <stdlib.h>
+
+/* system.h - typedefs and globals for OCRchie system **/
+/** enums and typdefs for OCRSystem include .h files */
+
+
+enum MapStatus { EMPTY, VALID, OPENERROR, READERROR, WRITEERROR,
+ OTHERERROR };
+
+enum TclMode {REGULAR, ZONING};
+
+typedef double Angle;
+typedef unsigned char uchar;
+typedef unsigned int Confidence;
+
+typedef unsigned long int Distance;
+
+typedef short int Property;
+typedef char Character;
+
+
+/** The number of properties in the property vector for components */
+extern int numProperties;
+
+/** Some variables for fine tuning OCR */
+/** These variables can be changed without recompiling in link_vars.tcl */
+
+extern int NoiseTolerance; // Minimum number of pixels in row of text
+extern int MinLineSize; // Minimum number of rows in a line of text
+extern int MinVertSeparation; // Minimum number of rows between lines of tex
+extern int MinHorizSeparation; // Minimum number of blank pixels btween chars
+extern int ConfidenceThreshold;// Minimum confidence for some operations
+extern int JoinTolerance; // Maximum number of pixels in a column
+ // joining two fused characters
+extern int MinComponentSize; //Minimum size in pixels of smallest char
+
+// The next four are used in character grouping
+
+extern int MaxVertSize; // Max vert pixels in char (used for baseline)
+extern int BaseLineTolerance; // How far from baseline is okay 1/%linesize
+extern int TopLineTolerance; // How far from topline is okay 1/%linesize
+
+
+/** Variables for user interface can be set in the file link_vars.tcl **/
+
+extern int ENABLE_USER_INTERFACE;
+extern int VERY_LOW_CONFIDENCE;
+extern int LOW_CONFIDENCE;
+extern int DISPLAY_LINE_BOUNDARIES;
+extern int DISPLAY_BOUNDING_BOXES;
+extern int SPELLCHECK;
+extern int DISPLAY_IMAGE;
+#define RLE_DESKEW 1
+#define BITMAP_DESKEW 0;
+#define MINIMUM_SKEW_ANGLE 0.25
+extern int DESKEW_METHOD;
+extern double SCALE_FACTOR;
+extern double ZONING_SCALE_FACTOR;
+extern TclMode mode;
+
+/***** end link_vars.tcl section ****/
+#include "Point.h"
+#include "list.h"
+#include "BitMap.h"
+#include "Component.h"
+#include "Word.h"
+#include "learn.h"
+#include "RLEPair.h"
+#include "RLEMap.h"
+#include "EqnMarker.h"
+#include "Zone.h"
+
+
+// Define C++ mode for tiff library
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+
+
+
+extern Point NOPNT; // Just a convenient empty point.
+
+
+/* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+extern int NumHorizDiv; //Number of horizontal divisions
+extern int NumVertDiv; //Number of vertical divisions
+
+extern Component * LearnedChars; /** Averaged learned chars NOT USED */
+
+extern unsigned int NumCharGroups;
+extern Components * LearnedGroups; // An array of 5 learned characters
+ //group lists
+
+/** The next 3 are set during learning ***/
+extern double MaxHWRatio; // Max H/W ratio of learned set
+extern double MinHWRatio;
+extern int MinWidth; // minimum component width in learned set
+
+extern uchar CharBitsSet[]; //a table of the number of bits set // in a character
+ // initialized in initCharBitsSet()
+
+
+
+
+
+
+// *** Global function declaration ***
+// functions in RLEMap.cc
+
+void testocr(int argc, char **argv);
+void testRLEMap(char * filename); // Right now in RLEMap.cc
+void testBitMap(char * filename); // Right now in BitMap.cc
+void testpixelsBetween(RLEMap * map); // in RLEMap.cc tests center row
+void printMap(RLEMap * map); // just an ascii "X" display
+
+
+// functions in BitMap.cc
+void testPixelsInRegion(BitMap * bmap, RLEMap * rmap);
+
+// functions in convertMap.cc
+void testConvertMap(char * filename);
+
+// functions in Component.cc
+void printVector(short int vector[], int size); // just prints contents
+
+// prints properties of component c using grayscales from map.
+void testProperties(Component* c, BitMap * map);
+
+
+ // functions in learn.cc
+Components * readLearnedChars(char * tiffFile, char * transFile);
+ /** Read Learned characters uses a tiffFile and a
+ ASCII translation file to read in a component list for
+ for comparison ***/
+
+// in system.cc
+void initCharBitsSet();
+void invertBitsInBuffer(uchar * buf, int size);
+void clearBitsInBuffer(uchar * buf, int size);
+
+short int countBitsSet(uchar c);
+int pixelsBetween(uchar * ar, int start, int end);
+void setRange(uchar ar[], int start, int end);
+char* backslashify(char*);
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/tcl_interface.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,619 @@
+#include <tcl.h>
+#include <tk.h>
+#include <stdlib.h>
+#include <string.h>
+#include "link.h"
+#include "tcl_interface.h"
+#include "stdio.h"
+#include "Page.h"
+
+extern Page* global_page;
+extern Page* active_page;
+extern Page* zoned_page;
+
+extern Tcl_Interp* TCL_ip;
+extern Tk_Window main_window;
+extern double SCALE_FACTOR;
+extern int DISPLAY_SPELLING_MISTAKES;
+extern TclMode mode;
+static int page_currently_open = 0;
+
+Component * curCompSelected;
+Component * prevCompSelected;
+
+
+
+void scale(int& coordinate)
+{
+ coordinate = (int)(coordinate * SCALE_FACTOR);
+}
+
+void scale(int& coordinate, double scaleFactor)
+{
+ coordinate = (int)(coordinate * scaleFactor);
+}
+
+int error(char* s)
+{
+/* would like to make this take var num args */
+ printf("Error: %s", s);
+}
+
+int quit_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ printf("Thank you for using OCRchie.\n");
+ exit(0);
+}
+
+int get_linenum_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // argv[1] - x coord argv[2] - y coord
+ global_page->get_linenum(atoi (argv[1]), atoi (argv[2]));
+ return TCL_OK;
+}
+
+int add_equation_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // argv[1] starting line of equation, [2] startcol [3] endline [4] endcol
+ global_page->addEquation(atoi (argv[1]), atoi (argv[2]), atoi(argv[3]),
+ atoi(argv[4]));
+ return TCL_OK;
+}
+
+int delete_equation_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->deleteEquation(atoi(argv[1]), atoi(argv[2]));
+ return TCL_OK;
+
+}
+
+int auto_zone_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ ((ZonedPage *) global_page)->autoZone(atoi(argv[1]), atoi(argv[2]));
+ return TCL_OK;
+}
+
+int add_zone_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // first we need to unscale from scaled immage
+ int ulx = (int)(atof(argv[1])/ZONING_SCALE_FACTOR);
+ int uly = (int)(atof(argv[2])/ZONING_SCALE_FACTOR);
+ int lrx = (int)(atof(argv[3])/ZONING_SCALE_FACTOR);
+ int lry = (int)(atof(argv[4])/ZONING_SCALE_FACTOR);
+
+ Point ul = Point(ulx,uly);
+ Point lr = Point(lrx, lry);
+
+ if ( ul != lr)
+ {
+ Zone * newzone = new Zone(ul,lr);
+ newzone->buildPage((ZonedPage *) global_page);
+ ((ZonedPage *)global_page)->zones()->Append(newzone);
+ }
+
+ return TCL_OK;
+
+}
+
+
+int remove_zone_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ int x = (int)(atof(argv[1])/ZONING_SCALE_FACTOR);
+ int y = (int)(atof(argv[2])/ZONING_SCALE_FACTOR);
+
+ Zones * z = ((ZonedPage *) global_page)->zones();
+
+ z->removeElement(z->findZone(x,y));
+ return TCL_OK;
+}
+
+int select_comp_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ prevCompSelected = curCompSelected;
+ //argv[1] is x coord argv[2] is y
+ curCompSelected = global_page->compAt(Point(atoi(argv[1]), atoi(argv[2])));
+ if (curCompSelected == NULL)
+ {
+ docommand("set validComponent 0");
+ }
+ else
+ {
+ docommand("set validComponent 1");
+ docommand("set curCompId %s",curCompSelected->fasciiId);
+ docommand("puts \"curCompId: $curCompId\"");
+ }
+ return TCL_OK;
+}
+
+int join_comp_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if (prevCompSelected != NULL && curCompSelected != NULL &&
+ prevCompSelected != curCompSelected &&
+ global_page->get_linenum(prevCompSelected) ==
+ global_page->get_linenum(curCompSelected))
+ {
+ global_page->join(prevCompSelected,curCompSelected);
+ printf("Joined. cur %u prev %u \n", (long) curCompSelected,
+ (long) prevCompSelected);
+
+ prevCompSelected = NULL;
+ curCompSelected = NULL;
+ }
+ else
+ printf("not Joined. cur %u prev %u\n", (long) curCompSelected,
+ (long) prevCompSelected);
+ {
+ prevCompSelected = NULL;
+ curCompSelected = NULL;
+ }
+
+ return TCL_OK;
+}
+
+int learn_comp_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+
+{
+ learn(curCompSelected, argv[1], 256);
+ return TCL_OK;
+}
+int split_comp_horiz_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->thinnestHorizontalSplit(curCompSelected);
+
+ return TCL_OK;
+}
+
+int get_skew_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ get_skew(global_page->rmap());
+ return TCL_OK;
+}
+
+int deskew_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(DESKEW_METHOD != -1)
+ {
+ if(global_page->deskew(DESKEW_METHOD))
+ global_page->rmap()->display_intervals("black");
+ return TCL_OK;
+ }
+ else
+ return TCL_OK;
+}
+
+int display_intervals_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if (ac == 1)
+ global_page->rmap()->display_intervals("black");
+ else
+ global_page->rmap()->display_intervals(argv[1], atof(argv[2]), "black");
+ return TCL_OK;
+}
+
+int page_open_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ mode = REGULAR;
+ if(ac != 2)
+ return TCL_ERROR;
+ printf("Opening %s\n", argv[1]);
+ if(page_currently_open == 1)
+ {
+ /* should print some message about closing the current one first */
+ return TCL_OK;
+ }
+ global_page = new ZonedPage;
+ if(global_page->readMap(argv[1]) != VALID)
+ interp->result = "0";
+ else
+ {
+ interp->result = "1";
+ }
+ active_page = global_page;
+ page_currently_open = 1;
+ return TCL_OK;
+}
+
+int zoned_page_open_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ mode = ZONING;
+ if(ac != 2)
+ return TCL_ERROR;
+ printf("Opening %s\n", argv[1]);
+ if(page_currently_open == 1)
+ {
+ /* should print some message about closing the current one first */
+ return TCL_OK;
+ }
+ global_page = new ZonedPage;
+ if(global_page->readMap(argv[1]) != VALID)
+ interp->result = "0";
+ else
+ {
+ interp->result = "1";
+ }
+ zoned_page = global_page;
+ page_currently_open = 1;
+ return TCL_OK;
+}
+
+
+int extract_comp_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->extractComponents(MinHorizSeparation);
+ return TCL_OK;
+
+}
+
+int get_page_height_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ sprintf(interp->result, "%d", global_page->get_height());
+ /* printf("Interpereter height = %s\n", interp->result); */
+ return TCL_OK;
+}
+
+int get_page_width_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ sprintf(interp->result, "%d", global_page->get_width());
+ /* printf("Interpereter width = %s\n", interp->result); */
+ return TCL_OK;
+}
+
+int zoom_in_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(ac == 1)
+ {
+ SCALE_FACTOR = SCALE_FACTOR * 2;
+ global_page->rmap()->display_intervals("black");
+ }
+ else
+ {
+ //argv[1] window argv[2] scalefactor
+ global_page->rmap()->display_intervals(argv[1], atof(argv[2]),"black");
+ }
+ return TCL_OK;
+}
+
+int zoom_out_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if (ac == 1)
+ {
+ SCALE_FACTOR = SCALE_FACTOR * 0.5;
+ global_page->rmap()->display_intervals("black");
+ }
+ else
+ global_page->rmap()->display_intervals(argv[1], atof(argv[2]),"black");
+
+ return TCL_OK;
+}
+
+int deallocate_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(page_currently_open == 0)
+ return TCL_OK; /* don't do anything if their isn't anything open */
+ delete global_page;
+ page_currently_open = 0;
+ return TCL_OK;
+}
+
+int learn_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // argv1 is tif argv2 is txt and argv3 is a bool for word synchronization
+ printf("Learning from %s and %s sych %d\n", argv[1], argv[2], atoi(argv[3]));
+ learn(argv[1], argv[2], atoi(argv[3]));
+ return TCL_OK;
+}
+
+int learn_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // argv1 is txt and argv2 is a bool for word synchronization
+ printf("Learning from %s and %s sych %d\n", argv[1], argv[2], atoi(argv[3]));
+ learn(global_page, argv[1], atoi(argv[2]));
+ return TCL_OK;
+}
+
+int learn_data_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ printf("Learning data from %s\n", argv[1]);
+ readLearnedGroups(argv[1]);
+ return TCL_OK;
+}
+
+int write_word_pos_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->writeWordPos(argv[1]);
+ return TCL_OK;
+}
+
+int write_wordbox_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // arguments are output file, x offset, y offset and WbxEquationsOnly variable
+ global_page->writeWordbox(argv[1], atoi(argv[2]), atoi(argv[3]), atoi(argv[4]));
+ return TCL_OK;
+}
+
+int write_equations_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ // arguments are output file, linenumber offset
+ global_page->writeEquations(argv[1], atoi(argv[2]));
+ return TCL_OK;
+}
+
+int write_learned_chars_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ writeLearnedGroups(argv[1]);
+ return TCL_OK;
+}
+
+int find_lines_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ deskew_cmd(clientData, interp, ac, argv);
+ /* printf("Calling setlines\n"); */
+ global_page->setLines();
+ return TCL_OK;
+}
+
+
+int recognize_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+
+{
+ /* just in case someone has left something sitting around */
+ docommand(".main_window.edit_window.text_part delete 1.0 end");
+ docommand("set COLORED_WORDS {}");
+ /* printf("Calling extractComponents\n"); */
+ if(global_page->components() == NULL)
+ global_page->extractComponents(MinHorizSeparation);
+ /* printf("Calling recognize\n"); */
+ global_page->recognize();
+ /* printf("Calling extractwords\n"); */
+ global_page->extractWords();
+ if(SPELLCHECK)
+ {
+ /* printf("Spellchecking\n"); */
+ global_page->spellcheck();
+ }
+ /* printf("Calling sendwordstotcl\n"); */
+ global_page->send_words_to_tcl();
+ return TCL_OK;
+}
+
+int find_lines_and_recognize_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+
+{
+ /* just in case someone has left something sitting around */
+ docommand(".main_window.edit_window.text_part delete 1.0 end");
+ docommand("set COLORED_WORDS {}");
+ /* printf("Calling deskew\n"); */
+ deskew_cmd(clientData, interp, ac, argv);
+ /* printf("Calling setlines\n"); */
+ global_page->setLines();
+ /* printf("Calling extractComponents\n"); */
+ global_page->extractComponents(MinHorizSeparation);
+ /* printf("Calling recognize\n"); */
+ global_page->recognize();
+ /* printf("Calling extractwords\n"); */
+ global_page->extractWords();
+ if(SPELLCHECK)
+ {
+ /* printf("Spellchecking\n"); */
+ global_page->spellcheck();
+ }
+ /* printf("Calling sendwordstotcl\n"); */
+ global_page->send_words_to_tcl();
+ return TCL_OK;
+}
+
+
+int switch_to_active_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if (active_page != NULL)
+ global_page = active_page;
+ return TCL_OK;
+}
+
+int switch_to_zoned_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if (zoned_page != NULL)
+ global_page = zoned_page;
+ return TCL_OK;
+}
+
+int set_active_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+
+ int x = atoi(argv[1]);
+ int y = atoi(argv[2]);
+ active_page = ((ZonedPage *) zoned_page)->activate(x,y);
+ if (active_page != NULL)
+ {
+ docommand(".main_window.edit_window.text_part delete 1.0 end");
+ docommand("set COLORED_WORDS {}");
+ docommand("focus .main_window.display");
+ docommand("grab current .main_window.display");
+ global_page = active_page;
+ docommand("DISPLAY_INTERVALS .main_window.display.work_space $SCALE_FACTOR");
+ docommand("FIND_LINES");
+ docommand("grab release .main_window.display");
+ docommand("zone_message \"Active Zone at (%d,%d)\"", x, y);
+ }
+ else
+ {
+ docommand("zone_message \"No zone found here\"");
+ }
+ return TCL_OK;
+
+}
+
+
+static int
+vdocommand1(char* s)
+{
+/* final function called to do a tcl docommand */
+ int code;
+
+ code = Tcl_Eval(TCL_ip, s);
+ if (code == TCL_ERROR)
+ error(TCL_ip->result);
+ return code;
+}
+
+void update()
+{
+ Tk_DoOneEvent(TK_ALL_EVENTS);
+}
+
+static int
+vdocommand(int record, char* fmt, va_list args)
+{
+/* helper for docommand */
+ char buf[4097];
+ int code;
+
+ vsprintf(buf, fmt, args);
+ if (strchr(buf, '\?'))
+ error("Huh?");
+ code = vdocommand1(buf);
+ return code;
+}
+
+
+int
+docommand(char* fmt, ...)
+{
+/* do a tcl command, var number of args */
+ va_list args;
+
+ va_start(args, fmt);
+ vdocommand(0, fmt, args);
+ va_end(args);
+}
+
+static int
+vset_status1(char* s)
+{
+/* final function called to do a tcl docommand */
+ int code;
+
+ code = Tcl_Eval(TCL_ip, s);
+ if (code == TCL_ERROR)
+ error(TCL_ip->result);
+ return code;
+}
+
+static int
+vset_status(int record, char* fmt, va_list args)
+{
+/* helper for docommand */
+ char buf[4097];
+ char newbuf[4097];
+ int code;
+
+ vsprintf(buf, fmt, args);
+ sprintf(newbuf, ".main_window.button_bar.msg configure -text \"%s\"", buf);
+ if (strchr(buf, '\?'))
+ error("Huh?");
+ code = vset_status1(newbuf);
+ return code;
+}
+
+int set_status(char* fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vset_status(0, fmt, args);
+ va_end(args);
+ docommand("update");
+}
+
+int mispelled(char* word)
+{
+ int result = docommand("spellcheck %s", word);
+ /* printf("call to spellcheck %s returned %s\n", word, TCL_ip->result); */
+ if(!(strcmp("MISPELLED", TCL_ip->result)))
+ return 1;
+ else
+ return 0;
+}
+
+
+
+int initialize_interpreter()
+{
+ TCL_ip = Tcl_CreateInterp();
+ Tcl_Init(TCL_ip);
+
+}
+
+
+int load_user_interface()
+{
+ main_window = Tk_CreateMainWindow(TCL_ip, NULL, "OCRchie", "OCRchie");
+ Tk_Init(TCL_ip);
+ Tk_MapWindow(main_window);
+ docommand("source new_ui.tcl");
+}
+
+int initialize_command_procs()
+{
+ Tcl_CreateCommand(TCL_ip, "page_open", page_open_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "zoned_page_open", zoned_page_open_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+Tcl_CreateCommand(TCL_ip, "EXTRACT_COMP", extract_comp_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "get_page_height", get_page_height_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "get_page_width", get_page_width_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "LEARN", learn_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "LEARN_PAGE", learn_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "LEARN_DATA", learn_data_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "WRITE_WORD_POS", write_word_pos_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "WRITE_WORDBOX", write_wordbox_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "WRITE_EQUATIONS", write_equations_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+
+ Tcl_CreateCommand(TCL_ip, "WRITE_LEARNED_CHARS", write_learned_chars_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "FIND_LINES_AND_RECOGNIZE", find_lines_and_recognize_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "FIND_LINES", find_lines_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+Tcl_CreateCommand(TCL_ip, "RECOGNIZE", recognize_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+Tcl_CreateCommand(TCL_ip, "SELECT_COMP", select_comp_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+Tcl_CreateCommand(TCL_ip, "JOIN_COMP", join_comp_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+Tcl_CreateCommand(TCL_ip, "LEARN_COMP", learn_comp_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+
+Tcl_CreateCommand(TCL_ip, "SPLIT_COMP_HORIZ", split_comp_horiz_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "GET_SKEW", get_skew_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DESKEW", deskew_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DISPLAY_INTERVALS", display_intervals_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "QUIT", quit_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ZOOM_IN", zoom_in_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ZOOM_OUT", zoom_out_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DEALLOCATE_PAGE", deallocate_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "GET_LINENUM",get_linenum_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ADD_EQUATION",add_equation_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DELETE_EQUATION",delete_equation_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "AUTO_ZONE",auto_zone_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ADD_ZONE",add_zone_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "REMOVE_ZONE",remove_zone_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+
+ Tcl_CreateCommand(TCL_ip, "SWITCH_TO_ACTIVE_PAGE",switch_to_active_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "SWITCH_TO_ZONED_PAGE",switch_to_zoned_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+
+ Tcl_CreateCommand(TCL_ip, "SET_ACTIVE_PAGE",set_active_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+
+
+ printf("Done initializing new tcl commands\n");
+}
+
+int initialize_link_vars()
+{
+ init_link_vars(); /* what a nice name */
+ docommand("source link_vars.tcl");
+ printf("Done initializing link variables\n");
+}
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/tcl_interface.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,44 @@
+#ifndef TCL_INTERFACE
+#define TCL_INTERFACE 1
+#include <stdarg.h>
+#include <stdio.h>
+extern double SCALE_FACTOR;
+int mispelled(char* s);
+int error(char* s);
+int docommand(char* fmt, ...);
+int set_status(char* fmt, ...);
+int initialize_interpreter();
+int load_user_interface();
+int initialize_command_procs();
+int initialize_link_vars();
+void update();
+void scale(int& coordinate);
+void scale(int& coordinate, double scaleFactor);
+#endif
+/*
+These commands can be called from Tcl
+(things in quotes are the tcl names, others are c++ functions)
+
+ "page_open", page_open_cmd,
+ "get_page_height", get_page_height_cmd,
+ "get_page_width", get_page_width_cmd,
+
+ "FIND_LINES_AND_RECOGNIZE", recognize_cmd,
+ "GET_SKEW", get_skew_cmd,
+ "DESKEW", deskew_cmd,
+ "DISPLAY_INTERVALS", display_intervals_cmd,
+ "QUIT", quit_cmd,
+ "ZOOM_IN", zoom_in_cmd,
+ "ZOOM_OUT", zoom_out_cmd,
+ "DEALLOCATE_PAGE", deallocate_page_cmd,
+
+
+*/
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/testocr.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,110 @@
+#include "system.h"
+#include "Page.h"
+#include "stdlib.h"
+#include <iostream.h>
+#include "tcl_interface.h"
+
+void draw_bitmap(int x, int y, char * xbmfile);
+/* int docommand(char* fmt, ...); */
+
+void testocr(int argc, char ** argv)
+/*** Just some test stuff for right now ***/
+{
+ Page * hello = new Page;
+ hello->readMap("hello.tif");
+
+ cout << "Test PixelsInRegion Functions"<< endl;
+ // This test has ranges appropriate for hello.tif
+ testPixelsInRegion(hello->bmap(), hello->rmap());
+ cout << endl;
+
+ cout << "Now test grayscale" << endl;
+ cout << "NumPixels in Region (3,5) .. (52,19) ";
+ cout << hello->bmap()->pixelsInRegion(Point(3,5), Point(52,19)) << endl;
+ cout << " Area: " << (52 -3 +1)*(19-5+1) << endl;
+ cout << " GrayScale: ";
+ cout << hello->bmap()->grayScale(Point(3, 5), Point(52,19)) << endl;
+
+ cout << endl << "Now lets look at the property vector for this region" << endl;
+ Component* c = new Component(Point(3, 5),Point(52,19));
+ testProperties(c, hello->bmap());
+ hello->bmap()->writeTclMap("hello",Point(0,0),Point(0,0),0);
+
+ cout << "Now some testing with pagebw.tif " << endl;
+ testLearn();
+ Page * testPage;
+ testPage = new Page;
+ cout << "reading map" << endl;
+ testPage->readMap("/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/bigtiff.tif");
+ NoiseTolerance = 10;
+ cout << "Finding the lines." << endl;
+ testPage->setLines();
+
+ if ((argc > 2) && !(strcmp(argv[2],"-ugly")));
+ else
+ {
+ docommand(".t.f.c create bitmap 637 825 -bitmap @/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/tif/pagebw");
+ docommand("update");
+
+ int centerline, width;
+ for(int j=0; j < testPage->fnumLines; j++)
+ {
+ centerline = (testPage->flineinfo[j].fendrow + testPage->flineinfo[j + 1].fstartrow) / 2;
+ width = testPage->flineinfo[j + 1].fstartrow - testPage->flineinfo[j].fendrow;
+ docommand(".t.f.c create line %d %d %d %d -width %d -fill blue -tags project_ray -stipple @/usr/sww/share/tclX-7.3a/tkX/3.6a/demos/bitmaps/grey.25", 0, centerline, testPage->bmap()->imageWidth(), centerline, width);
+ update();
+ }
+ for(int i= 0; i < 500; i++)
+ {
+ update();
+ }
+ }
+
+ testPage->extractComponents(MinHorizSeparation);
+ cout << "avgSpacing - " << testPage->avgSpacing() << endl;
+
+// testPage->printComponents();
+
+
+ testPage->recognize();
+ testPage->extractWords();
+ testPage->printWords();
+ delete testPage;
+ delete hello;
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+void testExtractAndMerge()
+{
+ Page * testPage = new Page;
+ RLEMap * newMap = new RLEMap;
+
+ testPage->readMap("train.tif");
+ testPage->rmap()->printPairs(142,145);
+ RLEPairs * p = testPage->rmap()->row(144)->extract(504, 520);
+ testPage->rmap()->row(143)->merge(p);
+ testPage->rmap()->printPairs(142,145);
+ testPage->setLines();
+ testPage->extractComponents(MinHorizSeparation);
+ testPage->printComponents();
+
+}
+
+
+
+
+
Binary file reference/ocr-new/train.tif has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/train.txt Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,14 @@
+a b c d e f g h i j k l m n o p q r s t u v w x y z : ; \
+A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+0 1 2 3 4 5 6 7 8 9 0 ~ ! @ # $ % % ^ & * ( ) + = - , . <<> > / ? '
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-new/tt.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,63 @@
+#!/usr/sww/bin/wish4.0-b4
+set COLORED_WORDS {}
+text .t -background white -foreground black
+pack .t
+set LOW_PRECISION_BACKGROUND green
+set MISPELLED_BACKGROUND blue
+set UNKNOWN_CHAR_BACKGROUND red
+.t tag configure LOW_PRECISION -background $LOW_PRECISION_BACKGROUND
+.t tag configure MISPELLED -background $MISPELLED_BACKGROUND
+.t tag configure UNKNOWN_CHAR -background $UNKNOWN_CHAR_BACKGROUND
+proc addword { w {xpos 0} {ypos 0} {status OK}} {
+ global COLORED_WORDS
+ puts $status
+ if { ![string compare $status OK] } {
+ .t insert end "$w "
+ .t mark set insert end
+ .t mark set insert "end -2 char"
+ .t tag add $status "insert wordstart" "insert wordend"
+ .t tag add x$xpos "insert wordstart" "insert wordend"
+ .t tag add y$ypos "insert wordstart" "insert wordend"
+ .t mark set insert end
+ } elseif { ![string compare $status LOW_PRECISION] || ![string compare $status MISPELLED] || ![string compare $status UNKNOWN_CHAR] } {
+ .t insert end "$w "
+ .t mark set insert end
+ .t mark set insert "end -3 char"
+ .t tag add $status "insert wordstart" "insert wordend"
+ .t tag add x$xpos "insert wordstart" "insert wordend"
+ .t tag add y$ypos "insert wordstart" "insert wordend"
+ lappend COLORED_WORDS [.t index insert]
+ .t mark set insert end
+ } else {
+ puts stdout "Unknown word status for $w: $status"
+ .t insert end "$w UNKNOWNSTATUS? "
+ }
+}
+
+proc pop_colored_words { } {
+ global COLORED_WORDS
+ set x [lindex $COLORED_WORDS 0]
+ if {[llength $COLORED_WORDS] == 1} {
+ set COLORED_WORDS {}
+ } elseif {[llength $COLORED_WORDS] == 0} {
+ set COLORED_WORDS $COLORED_WORDS
+ } else {
+ set COLORED_WORDS [lrange $COLORED_WORDS 1 [llength $COLORED_WORDS]]
+ }
+ return $x
+}
+
+bind .t <Tab> {
+ if {[llength $COLORED_WORDS] == 0} {
+ puts stdout "No more words"
+ } else {
+ .t mark set insert [pop_colored_words]
+ .t mark set insert "insert wordstart"
+ set x [.t index insert]
+ puts "New index is $x"
+ .t see insert
+ set local_tags [.t tag names insert]
+ puts "Tags at this place: $local_tags"
+ }
+ break
+}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/BitMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,433 @@
+ /* *****************************************************************
+ * BitMap.cc - Member functions for a BitMap *
+ * because of the complexity of the many Bit functions, they are *
+ * not all in this file. In this file are only the following *
+ * functions:
+ *
+ * BitMap() - Constructor
+ * ~BitMap() - Destructor
+ *
+ * int imageLength();
+ * int imageWidth();
+ * MapStatus & status;
+ * MapStatus readMap(char * filename) ;
+ * MapStatus writeMap(char * filename);
+ *
+ * // Data Access and low level manipulation functions
+ * uchar * row(int i) - Returns a pointer to row i
+ * MapStatus setBit(Point point, Color clr);
+ * Color readBit(Point point);
+ *
+ *
+ ***************************************************************/
+
+#include "BitMap.h"
+#include <iostream.h>
+#include <stdio.h>
+#include "status_message.h"
+
+inline int set_pixel_value(uchar** new_data, int y, int x, int new_val)
+{
+ new_data[y][x/8] |= (uchar)(new_val << (7-(x%8)));
+}
+
+inline int get_pixel_value(uchar** data, int y, int x)
+{
+ if((data[y][x/8]) & (1 << (7 - (x%8))))
+ return 1;
+ else
+ return 0;
+}
+
+
+BitMap::BitMap()
+:fImageWidth(0), fImageLength(0), fStatus(EMPTY), fMapData(NULL)
+/*--------------------------------------------------------------
+Primary Function: Constructor
+Return Value: pointer to new BitMap
+Effects: Initialize status to empty other values to zero
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{ };
+
+
+
+ BitMap::~BitMap()
+/*--------------------------------------------------------------
+Primary Purpose: destructor
+Effects: Deletes each row of BitPairs then the array of rows
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ if (fMapData != NULL)
+ {
+ int i;
+
+ // delete each row
+ for (i=0; i< fImageLength; i++)
+ delete fMapData[i];
+
+ // delete array of rows
+ delete fMapData;
+ }
+};
+
+
+
+uchar * BitMap::row(int i)
+/*--------------------------------------------------------------
+Primary Purpose: Access a row of the BitMap
+Arguments: i is the row to access
+Constraints: i < fImageLength
+Rev: KM 10/15
+---------------------------------------------------------------*/
+{
+ return fMapData[i];
+
+};
+
+
+
+MapStatus BitMap::readMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read an BitMap from a TIFF file
+Arguments: filename of TIFF file
+Return Value: A MapStatus, either VALID or READERROR
+Effects:
+ * BitMap::readMap(filename) will read a two level TIFF file
+ * and place it in an BitMap. The private fields of the BitMap
+ * set are:
+ fImageWidth - the pixel width of the image
+ fImageLength - the vertical pixel length of the image
+ fstat - the status after the Read VALID, OTHERERROR,READERROR
+ fMapData - an array of pointers to uchar arrays/
+Constraints: filename must be a two level TIFF file
+Rev: 10/15/95 KM Portions Borrowed from Assignment 1
+---------------------------------------------------------------*/
+{
+ TIFF *tif;
+ short photometric;
+
+ // Open File - Read length and width
+
+ tif = TIFFOpen (filename, "r");
+ if(tif == NULL)
+ { fStatus= OPENERROR;
+ return OPENERROR;
+ }
+
+ TIFFGetField (tif, TIFFTAG_IMAGELENGTH, &fImageLength);
+ TIFFGetField (tif, TIFFTAG_IMAGEWIDTH, &fImageWidth);
+ TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photometric);
+
+ fMapData = new (uchar *)[fImageLength];
+
+ printf("open succeeded on file %s. length = %d. width = %d\n",
+ filename, fImageLength, fImageWidth);
+ /* if(photometric == PHOTOMETRIC_MINISWHITE)
+ printf("min-is-white format\n");
+ else if(photometric == PHOTOMETRIC_MINISBLACK )
+ printf("min-is-black format\n"); */
+
+ if((photometric != PHOTOMETRIC_MINISWHITE) &&
+ (photometric != PHOTOMETRIC_MINISBLACK))
+ printf("with an unknown(!) photometric: %d\n", photometric);
+
+ // Calculate number of chars in a row
+ int numChars = (fImageWidth / 8 ) +1 ;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Reading %s...", filename);
+ last_status = 0.0;
+
+ for (int row = 0; row < fImageLength; ++row)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_read_status(row, fImageLength);
+ fMapData[row] = new uchar[numChars];
+ fMapData[row][numChars - 1] = 0;
+ TIFFReadScanline(tif,fMapData[row],row,0);
+ if(photometric != PHOTOMETRIC_MINISWHITE)
+ invertBitsInBuffer(fMapData[row], numChars);
+
+ }
+ last_status = 0.0;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done reading %s", filename);
+ TIFFClose(tif);
+ fStatus = VALID;
+ return VALID;
+
+};
+
+
+MapStatus BitMap::writeTclMap(char * filename,
+ Point & ul, Point & lr, int scaledown)
+{
+ FILE * outfile;
+ int numChars= fImageWidth /8 + 1;
+ outfile = fopen(filename, "w");
+
+ if(!outfile)
+ cout << " Could not open " << filename << endl;
+
+ fprintf(outfile, "%s_width %d\n",filename,fImageWidth);
+ fprintf(outfile, "%s_height %d\n",filename,fImageLength);
+ fprintf(outfile, "static char %s_bits[] {\n",filename);
+
+
+ for (int r = 0; r < fImageLength; r++)
+ {
+ for(int col=0; col < numChars; col++)
+ {
+ fprintf(outfile, "%4#x,", fMapData[r][col]);
+
+ if (!(r == fImageLength-1 && col == numChars-1))
+ fprintf(outfile, " ,");
+
+ if (((r*numChars + col) % 15)==0)
+ fprintf(outfile,"\n");
+ }
+
+ }
+ fprintf(outfile,"}\n");
+
+ fclose(outfile);
+
+}
+
+
+
+short int BitMap::grayScale(Point & ul, Point & lr)
+// Dummy function for now
+{
+ int numPixels = pixelsInRegion( ul, lr);
+ int area = (lr.x() - ul.x()+1) * (lr.y() - ul.y()+1);
+ if (area < numPixels) {
+ printf("Uh oh! Area = %d and pixels = %d\n", area, numPixels);
+ assert(area >= numPixels);
+ }
+ short int gscale =(short int)(((float)numPixels/area) * 255);
+
+ return gscale;
+ };
+
+
+
+int BitMap::pixelsInRegion( Point ul, Point lr)
+{
+ assert (ul >= Point(0,0)); /* did someone overload these? */
+ if (!(lr <= Point(fImageWidth, fImageLength)))
+ printf("problem\n");
+ assert (lr <= Point(fImageWidth, fImageLength));
+
+ if(ul > lr)return 0;
+
+ int ulx = ul.x(); int uly = ul.y();
+ int lrx = lr.x(); int lry = lr.y();
+ uchar * curRow;
+ int pixCount = 0;
+
+
+ for(int r = uly; r <= lry; r++)
+ {
+ curRow = row(r);
+ // Count middle (whole) characters
+ pixCount += pixelsBetween(curRow, ulx, lrx);
+// cout << pixelsBetween(curRow,ulx,lrx) <<" ";
+// cout << pixCount << endl;
+
+ }
+ return pixCount;
+};
+
+
+void testBitMap(char * filename)
+// Reads in BitMap and prints on screen
+{
+ BitMap * m = new BitMap;
+
+ m->readMap(filename);
+ int numChars = (m->imageWidth() / 8 )+ 1;
+
+ for (int r = 0; r < m->imageLength(); r++){
+ for (int c =0; c < numChars; c++) byteprint(m->row(r)[c]);
+ printf( "\n");
+ }
+
+};
+
+
+void byteprint(char d) // print bits in a byte, high bit on left
+{
+ for (int i= 7; i>=0; --i) {
+ if ((d>>i)&1) printf("X");
+ else printf(" ");
+ }
+};
+
+void bitprint(char d, int x)
+{
+ if ((d>>(7-x))&1) cout << "X";
+ else cout << " ";
+};
+
+
+class Page;
+
+void testPixelsInRegion(BitMap * bmap, RLEMap * rmap)
+// Reads in file and compares pixelsInRegion to RLEVersion
+{
+
+ int bmapcnt, rmapcnt;
+
+ cout << "Testing pixelsInRegion " << endl;
+
+ bmapcnt = bmap->pixelsInRegion(Point(0,0),
+ Point(bmap->imageWidth()-1, bmap->imageLength()-1));
+
+ rmapcnt = rmap->pixelsInRegion(Point(0,0),
+ Point(rmap->imageWidth()-1, rmap->imageLength()-1));
+
+ cout << "For whole page:";
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start on char edge end on edge (8,8) (16,21) ";
+ bmapcnt = bmap->pixelsInRegion(Point(8,8), Point(16,21));
+ rmapcnt = rmap->pixelsInRegion(Point(8,8), Point(16,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start on char edge, end mid char (0,8) (50,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(0,8), Point(50,21));
+ rmapcnt = rmap->pixelsInRegion(Point(0,8), Point(50,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start mid char, end on edge (2,8) (7,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,8), Point(7,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,8), Point(7,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start mid char, end mid char (2,8) (9,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,8), Point(9,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,8), Point(9,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+ cout << "Start and, end same char (2,8) (4,21)";
+ bmapcnt = bmap->pixelsInRegion(Point(2,4), Point(4,21));
+ rmapcnt = rmap->pixelsInRegion(Point(2,4), Point(4,21));
+ cout <<" Bitmap-" << bmapcnt << " RLEMap-" << rmapcnt << endl;
+
+};
+
+MapStatus BitMap::rotateMap(Angle angle)
+/*
+ Thanks to Clint Staley and S. Jacques @calpoly
+ for this bitmap rotation alg.
+
+ copied and slightly modified since it wass a pain getting
+ the RLE rotate to work and I think this might be decently
+ fast -AR
+*/
+{
+ int nx,ny,newheight,newwidth,oldheight,oldwidth,i,j,halfnewheight,halfnewwidth;
+ int halfoldheight,halfoldwidth;
+ double radians;
+ double cosval,sinval;
+ uchar** newMapData;
+
+ fprintf(stderr,"Rotating Image %lf Degrees\n",angle);
+ radians = -(angle) / ((180 / 3.142));
+ cosval = cos(radians);
+ sinval = sin(radians);
+
+ oldheight = fImageLength;
+ oldwidth = fImageWidth;
+
+ newwidth = (int)abs((int)(oldwidth*cosval)) + (int)abs((int)(oldheight*sinval));
+ newheight = (int)abs((int)(-oldwidth*sinval)) + (int)abs((int)(oldheight*cosval));
+
+ halfnewheight = newheight / 2;
+ halfnewwidth = newwidth / 2;
+ halfoldwidth = oldwidth /2;
+ halfoldheight = oldheight /2 ;
+
+ newMapData = new (uchar*) [newheight];
+
+ int num_chars = (newwidth / 8) + 1;
+
+ for (int row = 0; row < newheight; ++row)
+ {
+ newMapData[row] = new uchar[num_chars];
+ for (int k = 0; k < num_chars; k++)
+ newMapData[row][k] = '\0';
+ }
+
+ last_status = 0.0;
+ for(i=0;i < newheight;i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_rotation_status((int)i, (int)newheight);
+ for(j=0;j < newwidth;j++)
+ {
+
+/* set_pixel_value(newMapData, i, j, 0);
+ break; */
+
+ nx =(int)( (j - halfnewwidth)*cosval + (i-halfnewheight)*sinval);
+ ny =(int)( -((j - halfnewwidth)*sinval) + (i - halfnewheight)*cosval);
+ nx = nx + halfoldwidth;
+ ny = ny + halfoldheight;
+ if ((nx < oldwidth) && (ny < oldheight) && (nx > 0) && (ny > 0))
+ {
+ if(get_pixel_value(fMapData, ny, nx))
+ set_pixel_value(newMapData, i, j, 1);
+ else
+ set_pixel_value(newMapData, i, j, 0);
+ }
+ else
+ {
+ set_pixel_value(newMapData, i, j, 0);
+ }
+ }
+ }
+ if(ENABLE_USER_INTERFACE)
+ set_status("Rotating Image: Done");
+ last_status = 0.0;
+
+/* free up the old storage */
+ for(i = 0; i < fImageLength; i++)
+ {
+ free(fMapData[i]);
+ }
+ free(fMapData);
+
+/* assign pointer, etc to the new stuff */
+ fMapData = newMapData;
+ fImageLength = newheight;
+ fImageWidth = newwidth;
+}
+
+/*
+
+int set_pixel_value(uchar** new_data, int y, int x, int new_val)
+{
+ new_data[y][x/8] |= (uchar)(new_val << (7-(x%8)));
+}
+
+int get_pixel_value(uchar** data, int y, int x)
+{
+ if((data[y][x/8]) & (1 << (7 - (x%8))))
+ return 1;
+ else
+ return 0;
+}
+
+*/
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/BitMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,70 @@
+#ifndef _BITMAP_H
+#define _BITMAP_H
+#include "system.h"
+
+/** A BitMap representation stores image in an array of unsigned character
+ arrays. There is one uchar array per row. Each bit of the uchar
+ represents a pixel.
+***/
+
+
+class RLEMap;
+class Point;
+class BitMap;
+
+extern void byteprint(char d);
+extern void bitprint(char d, int x);
+
+class BitMap{
+ public:
+ friend MapStatus convertMap(RLEMap *,BitMap*,Point,Point);
+ BitMap();
+
+ ~BitMap();
+
+ int & imageLength()
+ {return fImageLength;};
+
+ int & imageWidth()
+ {return fImageWidth;};
+
+ MapStatus & status()
+ {return fStatus; };
+
+ uchar * row(int i); // returns a pointer to row i
+
+
+ // I/O operations. readMap and writeMap are from/to 2 level TIFF files
+
+ MapStatus readMap(char * filename);
+ MapStatus writeMap(char * filename); // not done
+
+ // Write out BitMap format for TCL/TK display
+ MapStatus writeTclMap(char * filename, Point & ul, Point & lr, int scaledown);
+ // Detect skew Angle
+ Angle skewAngle();
+
+ //Rotate the map designated angle.
+ MapStatus rotateMap(Angle angl);
+
+
+ // Return a ratio of black pixels to white pixels
+ // scaled to 255 0 = all white 255=all black
+ short int grayScale(Point & ul, Point & lr);
+
+ // number of black pixels in bounding box
+ int BitMap::pixelsInRegion(Point ul, Point lr);
+
+private:
+
+
+ int fImageWidth;
+ int fImageLength;
+ MapStatus fStatus;
+ uchar ** fMapData;
+
+} ;
+
+
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Boolean.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,25 @@
+// Copywrite David Wolfe, Rob Meyers, Doug Young, Edouard Serban-Schreiber
+// DO NOT DISTRIBUTE WITHOUT PERMISSION OF AN AUTHOR
+// See README
+
+#ifndef BOOLEAN_H
+#define BOOLEAN_H
+
+
+#include "assert.h"
+typedef int Boolean;
+typedef Boolean Direction;
+
+#define TRUE 1
+#define FALSE 0
+#define true 1
+#define false 0
+
+
+#define HORIZONTAL true
+#define VERTICAL false
+#define FIRSTPLAY true
+
+// Use c++ bool as a Boolean type.
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Component.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,476 @@
+#include "system.h"
+#include "Component.h"
+#include "BitMap.h"
+#include <assert.h>
+#include "list.h"
+#include "tcl_interface.h"
+
+/*** Component.cc
+ Member functions for Components
+ Component functions defined in Component.h
+ rev 12/9/95 KM
+***/
+
+
+Components::Components()
+:List()
+{
+}
+
+
+Components::~Components()
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Component *) (ptr->item);
+ }
+ while(!IsEmpty())
+ Remove();
+
+}
+
+
+int Component::AddToComponent(ListElement* intrvl, RLEMap* rlemap)
+//this needs to be fixed to trap page boundaries
+//or else pad the page with a blank line at top and bottom
+{
+ assert(intrvl != NULL);
+ List* list = new List(); //make a new queue
+ ListElement* current;
+ ListElement* nextelt;
+ int counter = 0;
+ int currentRow;
+
+ if (intrvl->previous != NULL)
+ intrvl->previous->next = intrvl->next;
+ else rlemap->fMapData[((RLEPair *) intrvl->item)->row]->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = intrvl->previous;
+ list->first = intrvl; //put starting interval on queue
+ list->last = intrvl;
+ list->length = 1;
+ intrvl->next = NULL;
+ intrvl->previous = NULL;
+
+ while ((intrvl = list->first) != NULL) //Take an interval off queue
+ {
+ currentRow = ((RLEPair *) intrvl->item)->row;
+ for (int i=-1; i < 2; i+=2) {
+ current = rlemap->fMapData[currentRow+i]->first;
+ while ((current != NULL)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
+
+// printf("Looking at an interval on row %d that goes from %d to %d\n",
+// currentRow, ((RLEPair *) intrvl->item)->start,
+// ((RLEPair *) intrvl->item)->end);
+
+ if ((((RLEPair *) current->item)->end
+ >= ((RLEPair *) intrvl->item)->start-1)
+ && (((RLEPair *) current->item)->start <=
+ ((RLEPair *) intrvl->item)->end+MinHorizSeparation)) {
+// printf("Adding connection for interval on row %d that goes from %d to %d\n", currentRow+i,
+// ((RLEPair *) current->item)->start,
+// ((RLEPair *) current->item)->end);
+
+ if (current->previous != NULL)
+ current->previous->next = current->next; //take off RLEMap
+ else
+ rlemap->fMapData[currentRow+i]->first = current->next;
+ if (current->next != NULL)
+ current->next->previous = current->previous;
+ nextelt = current->next;
+ list->last->next = current; //add to queue
+ current->previous = list->last;
+ list->last = current;
+ current->next = NULL;
+ current = nextelt;
+ list->length++;
+ } else
+ current = current->next;
+ }
+ }
+
+ if ((((RLEPair *) intrvl->item)->start < ful.x()) || (ful.x()==-1)) {
+ ful.x() = ((RLEPair *) intrvl->item)->start;
+// printf("Changed ful.x to %d\n", ful.x());
+ }
+ if ((((RLEPair *) intrvl->item)->end > flr.x()) || (flr.x()==-1)) {
+ flr.x() = ((RLEPair *) intrvl->item)->end;
+// printf("Changed flr.x to %d\n", flr.x());
+ }
+ if ((((RLEPair *) intrvl->item)->row < ful.y()) || (ful.y()==-1)) {
+ ful.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed ful.y to %d\n", ful.y());
+ }
+ if ((((RLEPair *) intrvl->item)->row > flr.y()) || (flr.y()==-1)) {
+ flr.y() = ((RLEPair *) intrvl->item)->row;
+// printf("Changed flr.y to %d\n", flr.y());
+ }
+ list->first = intrvl->next;
+ if (intrvl->next != NULL)
+ intrvl->next->previous = NULL;
+ delete ((RLEPair *) (intrvl->item));
+ delete intrvl; //so the letter O won't go forever
+ counter++;
+ list->length--;
+ }
+
+ delete list;
+ return counter;
+
+}
+
+void Component::setProperties(BitMap * map) // was BitMap
+/*--------------------------------------------------------------
+Primary Purpose: Set the property vector for this component
+Arguments: The BitMap to which this component belongs
+Return Value:
+Effects: The component is divided into a 5 by 5 grid. A gray
+ scale (0 - 255) for each section is determined. The gray scale
+ is 0 for all white, 255 for all black, but normally will be somewhere
+ between the two. The gray scales are represented in properties
+ 0-24.
+ Property 25 is the grayscale accross the top.
+ Property 26 is the grayscale accross the bottom.
+ Property 27 is the width/height ratio again scaled to (0-255)
+ Actually the formula for property 27 is
+ width/ height * 255 if height > width
+ 1- height/width * 255 if width > height
+ This way near 0 is very tall and thin
+ near 128 height near width
+ near 255 very wide
+ property 26 is height/width ratio 255 if height > width.
+ These values are put into fProperty array in this component.
+ Property 27 is 255 if the component is vertically disjoint like i or ;
+ or 0 if it is not.
+ Property 28 is Indicator of a vertically disjoint character
+ like i and j.
+ Also the total number of black pixels is set in fnumBits.
+ // This is not used at this time.
+Constraints: The data fields ful and flr must already be set
+ before calling this function. These fields specify a bounding
+ box for the character within the BitMap.
+Rev: 12/9 KM
+---------------------------------------------------------------*/
+{
+ if (ful > flr)
+ printf("Problem\n");
+ assert (ful <= flr);
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+ float height, width;
+ int propNum;
+ float darkest = 0;
+ float lightest;
+ int darkrow = 0;
+ int lightrow = 0;
+
+ Point sectionLr, sectionUl;
+ // Set Number of bits
+ fnumBits = map->pixelsInRegion(ful, flr);
+
+ setSectionFlags(hflag, vflag);
+ for (int r = 0; r < NumVertDiv; r++)
+ for (int c = 0; c < NumHorizDiv; c++)
+ {
+ propNum = (r * NumHorizDiv) + c;
+ sectionUl = Point(hflag[c], vflag[r]);
+ sectionLr = Point(hflag[c+1]-1, vflag[r+1]-1);
+ if (sectionUl <= sectionLr)
+ fproperty[propNum] = map->grayScale(sectionUl, sectionLr);
+ assert(fproperty[propNum] >= 0 && fproperty[propNum] < 256);
+ }
+
+ // set the height/width ratio
+ // 0 is very thin 128 is even 256 is very wide.
+ width = flr.x() - ful.x() + 1;
+ height = flr.y() - ful.y() + 1;
+
+ // Grayscale across the top - Indicator of top bar
+ sectionUl = Point(ful.x(), ful.y());
+ sectionLr = Point(flr.x(), ful.y() + (int)(height/(NumVertDiv*2)));
+ fproperty[25] = map->grayScale(sectionUl, sectionLr);
+
+ // Grayscale across bottom - Indicator of a foot for l opposed to 1
+ sectionUl = Point(ful.x(), flr.y() - (int)(height/(NumVertDiv*2)));
+ sectionLr = Point(flr.x(), flr.y());
+ fproperty[26] = map->grayScale(sectionUl, sectionLr);
+
+ float hdivw = (float)height/width;
+ float wdivh = (float) width/height;
+ if (width > height)
+ fproperty[27]= (short int) ((1- hdivw/2)*255);
+ else
+ fproperty[27] = (short int)((wdivh/2)* 255);
+
+ // is this a disjoint character like i or j 255 = yes 0 = no
+ fproperty[28]=0;
+ lightest = width;
+ for(int row = ful.y(); row < flr.y(); row++)
+ {
+ int pixelsThisRow = pixelsBetween(map->row(row), ful.x(), flr.x());
+ if(!(pixelsThisRow))
+ fproperty[28]=255;
+ }
+
+ fproperty[29]= 0;
+ for(int p = 0; p < numProperties; p++)
+ assert(fproperty[p] >= 0 && fproperty[p] < 256);
+
+
+}
+
+void Component::setSectionFlags(short int hflag[], short int vflag[])
+/*--------------------------------------------------------------
+Primary Purpose: Breaks this component into a grid NumHorizDiv X NumVertDiv
+ for determining grayscale property vectors.
+Arguments: hflag[] is an empty array to be filled by this procedure with
+ the starting columns of each horizontal subdivision. vflag[] will
+ be filled with the vertical subdivisions.
+Effects: fills hflag[] with the starting column for each subdivision.
+ The last element of the array is actually the pixel immediately
+ following the last subdivision. The last subdivision contains any
+ remaining pixels that did not divide evenly amongst the divisions.
+ vflag[NumHorizDiv] is comparable for vertical supdivisions.
+ Example ful = (0,25) flr = (52,46) NumHorizDiv = NumVertDiv = 5
+ hflag[6] = { 0,10,20,30,40,53 }
+ vflag[6] = {25.29.33.37.41.47 }
+Constraints: ful and flr must be set to mark the bounding box before
+ calling this procedure.
+Rev: 10/27 KM
+---------------------------------------------------------------*/
+{
+ int ulx = ful.x(); int uly = ful.y();
+ int lrx = flr.x(); int lry = flr.y();
+
+ int width = lrx - ulx+1;
+ int height = lry - uly+1;
+
+ int horizDiv = width/NumHorizDiv;
+ int vertDiv = height/NumVertDiv;
+
+ int horizExtra = width - horizDiv*NumHorizDiv;
+ int vertExtra = height - vertDiv*NumVertDiv;
+
+ int i, add;
+ for (i = 0; i < NumHorizDiv; i++)
+ {
+ if(horizExtra - i > 0) add = i; else add = horizExtra;
+ hflag[i] = ulx + (i*horizDiv)+ add;
+ }
+ hflag[i] = lrx + 1; // Closes off last division
+
+ int j;
+ for(j = 0; j < NumVertDiv; j ++)
+ {
+ if(vertExtra - j > 0) add = j; else add = vertExtra;
+ vflag[j] = uly + (j*vertDiv)+ add;
+ }
+ vflag[j] = lry + 1;
+
+
+
+
+}
+
+
+Distance Component::distance(Component * comp)
+/*--------------------------------------------------------------
+Primary Purpose: Determines heuristic distance between two components
+Arguments: Another component to compare
+Return Value: integer value which represents the distance between two
+ components. Distance = sum over i of
+ weight *square (this->fproperty[i] - comp->fproperty[i])
+ weight for i == 27, 28 is 3 weight is 1 for all other
+ properties
+Constraints: setProperties must have been run on both components
+
+Rev: 11/1 KM
+---------------------------------------------------------------*/
+{
+ Property * a = fproperty;
+ Property * b = comp->properties();
+ Distance dist=0;
+ int dif=0;
+ int worst = 0;
+ int weight = 1;
+
+ for(int i= 0; i < numProperties; i++)
+ {
+
+
+ if (i == 27 || i == 28) weight = 3;
+ else weight = 1;
+
+ dif = (a[i] - b[i]);
+ dist += weight*dif*dif;
+ }
+
+ return dist;
+}
+
+
+
+
+
+
+
+
+
+void printVector(short int vector[], int size)
+{
+ for (int i = 0; i < size; i++)
+ cout << vector[i] << " " ;
+ cout << endl;
+
+}
+
+
+void testProperties(Component* c, BitMap * map)
+{
+ short int hflag[NumHorizDiv + 1]; // flags horizontal section dividers
+ short int vflag[NumVertDiv + 1]; // flags vertical section dividers
+
+ cout << "First test subDivisions " << endl;
+ c->setSectionFlags(hflag, vflag);
+ cout << "Horizontal flags" <<endl;
+ printVector(hflag, NumHorizDiv + 1);
+ cout << "Vertical flags" <<endl;
+ printVector(vflag, NumHorizDiv + 1);
+
+ cout << "Now lets look at the properties " << endl;
+ // setSectionFlags will actually get called again within setProperties
+ c->setProperties(map);
+ printVector(c->properties(), NumHorizDiv*NumVertDiv + 1);
+ cout << endl << " The distance of this component from itself: " << " ";
+ cout << c->distance(c) << endl;
+
+}
+
+void Component::display_bounding_box()
+{
+ int ulx = (ul()).x();
+ int uly = (ul()).y();
+ int lrx = (lr()).x();
+ int lry = (lr()).y();
+ scale(ulx); scale(uly); scale(lrx); scale(lry);
+
+ docommand(".main_window.display.work_space create rectangle %d %d %d %d -outline blue -tags IMAGE_TAG", ulx, uly, lrx, lry);
+}
+
+Distance Component::recognize(Component * learnedchars)
+// This is out of date. Current recognize is below
+{
+ Distance d, nextd;
+ char id;
+
+// printf("Another call to recognize\n");
+ d = (256*256)*numProperties; // this is the biggest distance
+
+
+
+ for (int i = 0; i < 256; i++)
+ {
+ if(learnedchars[i].confid() != 0)
+ {
+ nextd = distance(&learnedchars[i]);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = (char) i;
+ }
+ }
+
+ }
+
+ fasciiId = id;
+ /* printf("Recognized a Component: %c\n", id); */
+ return d;
+}
+
+
+Distance Component::recognize(Components * learnedgroups, bool allGroups)
+{
+ Distance d, worstDistance,nextd;
+ char id;
+ short int fontid;
+ float tempd;
+ worstDistance = 150000;
+
+// printf("Another call to new recognize\n");
+ d = (65536)*numProperties; // this is the biggest distance
+
+
+ fconfid = 0;
+
+ for(int g = 0; g < NumCharGroups &&
+ ((fconfid < ConfidenceThreshold) || allGroups); g++)
+ {
+ int offset = (charGroup+g) % NumCharGroups;
+ if (offset == 4 && charGroup != 4) continue;
+ for (ListElement* ptr = learnedgroups[offset].first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ Component * item = (Component *) ptr->item;
+ nextd = distance(item);
+// printf("Distance = %d, character = %c\n", nextd, i);
+ if (nextd < d)
+ {
+ d = nextd;
+ id = item->fasciiId;
+ fontid = item->ffontId;
+ }
+
+ }
+
+ if (d >= worstDistance)
+ tempd = worstDistance - 1;
+ else tempd = d;
+
+ fconfid = (unsigned short int)
+ (255 - (tempd/worstDistance)*256);
+ if(charGroup == 4) break; // dont check other groups for floaters
+ }
+
+
+ fasciiId = id;
+
+
+ ffontId = fontid;
+// printf("Recognized a Component: %c in font#%d %d %u\n",
+// id, fontid, fconfid, d);
+
+
+
+ return d;
+}
+
+
+
+int Component::vertShrink(BitMap * bitmap)
+{
+ int r;
+ int shrunk = 0;
+ for(r = ful.y(); r < flr.y(); r++)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ ful.y() = r;
+ shrunk = 1;
+ break;
+ }
+ for(r = flr.y(); r > ful.y(); r--)
+ if (pixelsBetween(bitmap->row(r), ful.x(), flr.x()))
+ {
+ flr.y() = r;
+ shrunk = 1;
+ break;
+ }
+
+ return shrunk;
+ }
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Component.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,122 @@
+#ifndef _COMPONENT_H
+#define _COMPONENT_H
+#include "system.h"
+#include "list.h"
+#include "RLEMap.h"
+
+class BitMap;
+
+class Components: public List
+{
+ public:
+ Components();
+ ~Components();
+
+
+};
+
+class Component{
+ public:
+
+ Component(Point ul, Point lr)
+ :ful(ul), flr(lr), fnumBits(0),fasciiId(0), fconfid(0),
+ charGroup(0), ffontId(0)
+ { fproperty = new Property[numProperties];
+ for (int i = 0; i < numProperties; i++)
+ fproperty[i] = 0;};
+
+ Component()
+ :ful(-1), flr(-1), fnumBits(0),fasciiId(0),fconfid(0),
+ charGroup(0), ffontId(0)
+ { fproperty = new Property[numProperties];
+ for (int i = 0; i < numProperties; i++)
+ fproperty[i] = 0;};
+ ~Component(){
+ if (fproperty != NULL) delete fproperty;}
+
+
+ inline Point & ul() {return ful;};
+ inline Point & lr() {return flr;};
+
+ inline double width() {return (double)(flr.x() - ful.x() + 1); };
+ inline double height() {return (double)(flr.y() - ful.y() + 1); };
+ inline int & numBits(){return fnumBits;};
+ short int charGroup; // values 0-3: 0=acemno, 1=gpqy, 2=dfhikl, 3=j([}
+ int vertShrink(BitMap * bitmap);
+ int AddToComponent(ListElement* intrvl, RLEMap* rlemap);
+ /*--------------------------------------------------------------
+ Primary Purpose: Extend boundaries of component by connected intervals.
+ Arguments: startIntrvl is interval to start from
+ Return Value: Number of intervals added.
+ Effects: Updates component's LR and UL. Deletes all added intervals.
+ ----------------------------------------------------------------*/
+
+ // Access entire property vector
+ inline Property * properties()
+ {return fproperty;}
+
+ // Set or read one value in property vector
+ inline Property & property(int p) // Possibly use overload []
+ {return fproperty[p];};
+
+ // Set or read ascii id;
+ inline Character & asciiId(){ return fasciiId;};
+
+ // Set or read font id;
+ inline short int & fontId() {return ffontId;};
+
+ inline Confidence & confid(){ return fconfid; };
+ // Determine heuristic distance between this and comp
+ Distance distance(Component * comp);
+
+ // Use the BitMap map to set the property vector for
+ // this component
+
+ void setProperties(RLEMap * map);
+ void setProperties(BitMap * map);
+ void setSectionFlags(short int hflag[], short int vflag[]);
+
+ void display_bounding_box();
+
+ Distance recognize(Component * learnedchars);
+ // find best match in learned characters. Set ascii value
+ // and return distance.
+
+ Distance recognize(Components * learnedgroups, bool allGroups=FALSE);
+ // find best match from learned character groups. Set ascii value
+ // and return distance.
+
+ Property * fproperty;
+ private:
+ Point ful;
+ Point flr;
+ int fnumBits;
+
+
+ Character fasciiId;
+ short int ffontId;
+ Confidence fconfid;
+};
+
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/LineMarker.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,19 @@
+#ifndef _LINEMARKER_H
+#define _LINEMARKER_H
+/** LineMarker.h
+ There is a LineMarker array member of Page, which records the
+ starting and ending row of each line of text.
+**/
+
+class LineMarker {
+// Line Marker marks the starting and ending row of a line of text
+// in a Page
+
+ public:
+ friend class Page;
+ int fstartrow;
+ int fendrow;
+};
+
+#endif
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Makefile Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,115 @@
+#Makefile for older OCRchie
+
+CC = g++
+
+DEBUG = -fpermissive -g -v
+OPTIMIZE = -O
+CCFLAGS = $(DEBUG)
+EXECUTABLE = ocrchie
+
+# TCL root directory
+TCLBASE = /usr/sww/tcl
+TKBASE = /usr/sww/tcl
+# X11 root directory
+X11BASE = /usr/sww/X11
+
+# math library directory
+MATHLIB = /lib/pa1.1
+
+TIFFLIB = /usr/sww/lib
+
+# Place where object files need to be stored
+ODIR = /tmp/rjf-ocrchie/
+
+# That's it. Now just do a gmake.
+
+INCLUDE = -I${TCLBASE}/include -I${X11BASE}/include
+LIBDIRS = -L${TCLBASE}/lib -L${X11BASE}/lib -L${MATHLIB} -L${TIFFLIB} -L${TKBASE}
+#LIBDIRS = -L${TCLBASE}/lib -L${X11BASE}/lib -L${MATHLIB}
+LIBS = ${LIBDIRS} -ltcl8.0 -lX11 -lm -ldl -lnsl -lsocket -ltiff -ltk8.0
+HEADERS = system.h list.h Component.h \
+ BitMap.h RLEPair.h RLEMap.h LineMarker.h Page.h Point.h \
+ convertMap.h Component.h Word.h tcl_interface.h
+
+
+OFILES = $(ODIR)main.o $(ODIR)project.o $(ODIR)histogram.o \
+ $(ODIR)get_skew.o $(ODIR)tcl_interface.o \
+ $(ODIR)RLEMap.o $(ODIR)RLEPair.o $(ODIR)list.o \
+ $(ODIR)convertMap.o $(ODIR)Component.o $(ODIR)Page.o\
+ $(ODIR)BitMap.o $(ODIR)system.o $(ODIR)testocr.o \
+ $(ODIR)Word.o $(ODIR)learn.o $(ODIR)status_message.o \
+ $(ODIR)link.o
+
+#The Executable project
+ocrchie: $(OFILES)
+ $(CC) -fpermissive -g -v -o $(EXECUTABLE) ${INCLUDE} $(OFILES) $(LIBS)
+# The .o files compiled in ODIR because of space considerations
+
+$(ODIR)link.o: link.cc link.h
+ $(CC) $(CCFLAGS) ${INCLUDE} -o $(ODIR)link.o -c link.cc
+$(ODIR)status_message.o: status_message.cc status_message.h
+ $(CC) $(CCFLAGS) -o $(ODIR)status_message.o -c status_message.cc
+
+$(ODIR)system.o: system.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)system.o -c system.cc
+
+$(ODIR)Page.o: $(HEADERS) Page.cc RLEMap.h RLEPair.h BitMap.h
+ $(CC) $(CCFLAGS) -o $(ODIR)Page.o -c Page.cc
+
+$(ODIR)RLEMap.o: $(HEADERS) system.cc\
+ RLEMap.cc RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEMap.o -c RLEMap.cc
+
+$(ODIR)BitMap.o: $(HEADERS) system.cc BitMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)BitMap.o -c BitMap.cc
+
+$(ODIR)RLEPair.o: $(HEADERS) RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEPair.o -c RLEPair.cc
+
+$(ODIR)Component.o: $(HEADERS) Component.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Component.o -c Component.cc
+
+$(ODIR)list.o: list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)list.o -c list.cc
+
+$(ODIR)Word.o: Word.h Component.h list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Word.o -c Word.cc
+
+$(ODIR)convertMap.o: $(HEADERS) convertMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)convertMap.o -c convertMap.cc
+
+$(ODIR)learn.o: system.h learn.h learn.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)learn.o -c learn.cc
+
+$(ODIR)main.o: main.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)main.o -c ${INCLUDE} main.cc
+
+$(ODIR)testocr.o: testocr.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)testocr.o -c testocr.cc
+
+$(ODIR)project.o: project.cc project.h bitmap.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)project.o -c project.cc
+
+
+$(ODIR)histogram.o: histogram.cc histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)histogram.o -c histogram.cc
+
+$(ODIR)get_skew.o: get_skew.cc get_skew.h project.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)get_skew.o -c get_skew.cc
+
+$(ODIR)tcl_interface.o: tcl_interface.cc tcl_interface.h
+ $(CC) $(CCFLAGS) -o $(ODIR)tcl_interface.o -c ${INCLUDE} tcl_interface.cc
+
+#$(ODIR)deskew.o: deskew.cc deskew.h
+# $(CC) $(CCFLAGS) -c deskew.cc
+
+#$(ODIR)rotate.o: rotate.cc
+# $(CC) $(CCFLAGS) -c rotate.cc
+
+
+
+clean:
+ rm -f core *.o *~
+#
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Makefile-orig Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,119 @@
+#Makefile for OCRchie
+
+CC = g++
+#CC = purify -cache-dir=/tmp/ocrchie/cache -collector=/usr/sww/lib/gcc-lib/hppa1.1-hp-hpux9.03/2.6.3/ld g++
+#CC = quantify -cache-dir=/tmp/ocrchie/cache -collector=/usr/sww/lib/gcc-lib/hppa1.1-hp-hpux9.03/2.6.3/ld g++
+
+DEBUG = -g
+OPTIMIZE = -O
+CCFLAGS = $(DEBUG)
+EXECUTABLE = ocrchie
+
+# TCL root directory
+# on .cs machines
+TCLBASE = /usr/sww/tcl-7.4b4
+# on .eecs machines
+#TCLBASE = /usr/sww/tcl-7.4
+
+# X11 root directory
+X11BASE = /usr/sww/X11
+
+# math library directory
+MATHLIB = /lib/pa1.1
+
+# Place where object files need to be stored
+ODIR = /tmp/ocrchie/
+
+# That's it. Now just do a gmake.
+
+INCLUDE = -I${TCLBASE}/include -I${X11BASE}/include
+LIBDIRS = -L${TCLBASE}/lib -L${X11BASE}/lib -L${MATHLIB}
+LIBS = ${LIBDIRS} -ltk -ltcl -lX11 -lM -ltiff
+HEADERS = system.h list.h Component.h \
+ BitMap.h RLEPair.h RLEMap.h LineMarker.h Page.h Point.h \
+ convertMap.h Component.h Word.h tcl_interface.h
+
+
+OFILES = $(ODIR)main.o $(ODIR)project.o $(ODIR)histogram.o \
+ $(ODIR)get_skew.o $(ODIR)tcl_interface.o \
+ $(ODIR)RLEMap.o $(ODIR)RLEPair.o $(ODIR)list.o \
+ $(ODIR)convertMap.o $(ODIR)Component.o $(ODIR)Page.o\
+ $(ODIR)BitMap.o $(ODIR)system.o $(ODIR)testocr.o \
+ $(ODIR)Word.o $(ODIR)learn.o $(ODIR)status_message.o \
+ $(ODIR)link.o
+
+#The Executable project
+ocrchie: $(OFILES)
+ $(CC) -g -o $(EXECUTABLE) ${INCLUDE} $(OFILES) $(LIBS)
+
+
+# The .o files compiled in ODIR because of space considerations
+
+$(ODIR)link.o: link.cc link.h
+ $(CC) $(CCFLAGS) ${INCLUDE} -o $(ODIR)link.o -c link.cc
+$(ODIR)status_message.o: status_message.cc status_message.h
+ $(CC) $(CCFLAGS) -o $(ODIR)status_message.o -c status_message.cc
+
+$(ODIR)system.o: system.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)system.o -c system.cc
+
+$(ODIR)Page.o: $(HEADERS) Page.cc RLEMap.h RLEPair.h BitMap.h
+ $(CC) $(CCFLAGS) -o $(ODIR)Page.o -c Page.cc
+
+$(ODIR)RLEMap.o: $(HEADERS) system.cc\
+ RLEMap.cc RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEMap.o -c RLEMap.cc
+
+$(ODIR)BitMap.o: $(HEADERS) system.cc BitMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)BitMap.o -c BitMap.cc
+
+$(ODIR)RLEPair.o: $(HEADERS) RLEPair.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)RLEPair.o -c RLEPair.cc
+
+$(ODIR)Component.o: $(HEADERS) Component.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Component.o -c Component.cc
+
+$(ODIR)list.o: list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)list.o -c list.cc
+
+$(ODIR)Word.o: Word.h Component.h list.h list.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)Word.o -c Word.cc
+
+$(ODIR)convertMap.o: $(HEADERS) convertMap.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)convertMap.o -c convertMap.cc
+
+$(ODIR)learn.o: system.h learn.h learn.cc
+ $(CC) $(CCFLAGS) -o $(ODIR)learn.o -c learn.cc
+
+$(ODIR)main.o: main.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)main.o -c ${INCLUDE} main.cc
+
+$(ODIR)testocr.o: testocr.cc $(HEADERS)
+ $(CC) $(CCFLAGS) -o $(ODIR)testocr.o -c testocr.cc
+
+$(ODIR)project.o: project.cc project.h bitmap.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)project.o -c project.cc
+
+
+$(ODIR)histogram.o: histogram.cc histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)histogram.o -c histogram.cc
+
+$(ODIR)get_skew.o: get_skew.cc get_skew.h project.h histogram.h
+ $(CC) $(CCFLAGS) -o $(ODIR)get_skew.o -c get_skew.cc
+
+$(ODIR)tcl_interface.o: tcl_interface.cc tcl_interface.h
+ $(CC) $(CCFLAGS) -o $(ODIR)tcl_interface.o -c ${INCLUDE} tcl_interface.cc
+
+#$(ODIR)deskew.o: deskew.cc deskew.h
+# $(CC) $(CCFLAGS) -c deskew.cc
+
+#$(ODIR)rotate.o: rotate.cc
+# $(CC) $(CCFLAGS) -c rotate.cc
+
+
+
+clean:
+ rm -f core *.o *~
+#
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Page.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,873 @@
+/** Page.cc contains the member functions for the primary OCR class Page */
+#include "system.h"
+#include "Page.h"
+#include "convertMap.h"
+#include "get_skew.h"
+#include "Component.h"
+#include "status_message.h"
+
+/*** Member functions of class Page. ***/
+
+int Page::get_height()
+{
+ return fRLEMap->imageLength();
+}
+
+int Page::get_width()
+{
+ return fRLEMap->imageWidth();
+}
+
+int Page::send_words_to_tcl()
+/*--------------------------------------------------------------
+Primary Purpose: Display words in tcl
+Rev - AR
+---------------------------------------------------------------*/
+{
+ int word_count = 0;
+ int unknown_char_count = 0;
+ int low_precision_count = 0;
+ int mispelled_count = 0;
+ char* send_chars;
+ Word* temp_word;
+ if(ENABLE_USER_INTERFACE) set_status("Displaying text");
+ for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
+ {
+ word_count++;
+ set_text_display_status(word_count, fWordList->num_words);
+ temp_word = (Word*)ptr->item;
+ send_chars = backslashify(temp_word->characters);
+ /* printf("Added word %s Confidence = %d\n", send_chars,
+ temp_word->confid); */
+ if(temp_word->confid < VERY_LOW_CONFIDENCE)
+ {
+ docommand("addword \"%s\" %d %d UNKNOWN_CHAR", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ unknown_char_count++;
+ }
+ else if(temp_word->confid < LOW_CONFIDENCE)
+ {
+ docommand("addword \"%s\" %d %d LOW_PRECISION", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ low_precision_count++;
+ }
+ else if((temp_word->mispelled) && SPELLCHECK)
+ {
+ docommand("addword \"%s\" %d %d MISPELLED", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ mispelled_count++;
+ }
+ else
+ {
+ docommand("addword \"%s\" %d %d OK", send_chars, temp_word->ul.x(), temp_word->ul.y());
+ }
+ update();
+ }
+ if(ENABLE_USER_INTERFACE)
+ {
+ set_status("Done displaying text");
+ set_status("Apparent word accuracy: %.3lf%%", (100 - (100 * ((double)(mispelled_count + unknown_char_count + low_precision_count) / (double)word_count))));
+ }
+}
+
+
+int Page::deskew(int deskew_method)
+/*--------------------------------------------------------------
+Primary Purpose: Deskew the page
+Arguments: 1 - RLE Rotation
+ 0 - BitMap Rotation
+Return Value: 1 if successful, 0 if unsuccessful
+Effects: updates the bitmap and rlemap of the page
+Constraints: RLEMap Rotation is not currently reliable and probably
+should not be used
+Rev: AR
+---------------------------------------------------------------*/
+{
+ /* a little ugly.... if the page is rotated
+ in here, return 1, else 0 */
+
+ if(deskew_method == RLE_DESKEW)
+ {
+ if(fRLEMap->deskew())
+ {
+ convertMap(fRLEMap, fBitMap);
+ return 1;
+ }
+ return 0;
+ }
+ else
+ {
+ double skew = get_skew(fRLEMap);
+ if((skew >= MINIMUM_SKEW_ANGLE)||(skew <= - MINIMUM_SKEW_ANGLE))
+ {
+ fBitMap->rotateMap(skew);
+ convertMap(fBitMap, fRLEMap);
+ return 1;
+ }
+ return 0;
+ }
+}
+
+Page::Page()
+/**Page::Page - constructor allocates bitmap and rlemap*/
+{
+ fBitMap = new BitMap;
+ fRLEMap = new RLEMap;
+ fLineComponents = NULL;
+ fWordList = NULL;
+}
+
+Page::~Page()
+/*--------------------------------------------------------------
+Primary Purpose: Destructor deallocates private fields that
+have been created.
+Rev:
+---------------------------------------------------------------*/
+{
+
+ if (flineinfo) delete flineinfo;
+ for (int i = 0; i <fnumLines; i++)
+ if(fLineComponents[i]) delete fLineComponents[i];
+ if(fLineComponents) delete fLineComponents;
+ if (fBitMap) delete fBitMap;
+ if (fRLEMap) delete fRLEMap;
+ if (fWordList) delete fWordList;
+}
+
+Angle Page::skewAngle()
+/*--------------------------------------------------------------
+Primary Purpose: Determine the angle of rotation of the RLEMap r
+Arguments: pointer to an RLEMap
+Return Value: detected angle of rotation
+Code is in get_skew.cc
+Rev: AR
+---------------------------------------------------------------*/
+{
+ return get_skew(fRLEMap);
+}
+
+
+MapStatus Page::readMap(char * filename)
+ // Calls BitMap::readMap and then converts
+{
+ MapStatus status;
+ status = fBitMap->readMap(filename);
+ convertMap(fBitMap, fRLEMap);
+ return status;
+}
+
+
+
+MapStatus Page::setLines()
+/*--------------------------------------------------------------
+Primary Purpose: Set flineinfo array in Page class with the
+ starting and ending rows of each line of text.
+ Also sets fnumLines to the number of lines
+Arguments: none
+Return Value: A Mapstatus either VALID, EMPTY if there is no
+ data in the RLEMAP, or OTHERERROR if there is an unexpected error
+Effects: Allocates flineinfo and fills with starting and ending row
+ of each line. The following global variables are used as parameters
+ in this function. These are defined in system.cc
+ NoiseTolerance - Rows whose number of pixels is less than this value
+ will be considered empty (current val 6).
+ MinVertSeparation - The minimum number of rows separating lines of text.
+ Lines will be merged if actual Separation is less than this
+ value. (current val 3)
+ MinLineSize - The minimum number of rows in a line of text.
+ Any smaller lines are discarded (currentval 5)
+
+Constraints: Page::readMap() must be run first to fill fRLEMap
+Rev: 10/26 KM
+---------------------------------------------------------------*/
+{
+
+ int maxrow = fRLEMap->imageLength() - 1; // maximum row number
+ int actualSeparation = MinVertSeparation + 1; // must be bigger than min
+ // for line 0
+
+ int linenum=0; // current line number
+ int prvlinenum = 0;
+ int lineSize; // # rows in current line
+
+ int maxLines = maxrow/MinLineSize; // max # of lines of text
+
+ if(maxrow == 0) return EMPTY;
+
+ flineinfo = new LineMarker[maxLines];
+
+ for (int i = 0; i < maxrow;)
+ {
+ LineMarker & thisLine = flineinfo[linenum];
+ LineMarker & prevLine = flineinfo[prvlinenum];
+
+ while (i < maxrow && fRLEMap->row(i)->numPixels < NoiseTolerance)
+ i++;
+ thisLine.fstartrow = i++;
+ while (i < maxrow &&fRLEMap->row(i)->numPixels > NoiseTolerance)
+ i++;
+
+
+ lineSize = i - thisLine.fstartrow +1;
+
+ // If this line is less than MinVertSeparation away
+ // from the last line. Join the two together.
+ if (linenum > 0)
+ {
+ actualSeparation = thisLine.fstartrow - prevLine.fendrow;
+ }
+ if (actualSeparation < MinVertSeparation)
+ {
+ // If too small of a separation, add into prev row
+ prevLine.fendrow = i;
+ }
+ else if (lineSize >= MinLineSize)
+ {
+ thisLine.fendrow = i;
+/* printf (" Line %d Start: %d End: %d lineHeight %d\n",
+ linenum,thisLine.fstartrow,
+ thisLine.fendrow,
+ thisLine.fendrow - thisLine.fstartrow +1);
+*/
+ prvlinenum = linenum;
+ linenum++;
+
+ }
+ if (linenum >= maxLines) return OTHERERROR;
+ }
+
+ fnumLines = linenum; // Set number of lines in page class
+
+ fLineComponents = new Components*[fnumLines];
+ if((ENABLE_USER_INTERFACE) && DISPLAY_LINE_BOUNDARIES)
+ {
+ display_line_boundaries();
+ }
+ /* printf("Setlines found a total of %d lines.\n", fnumLines); */
+ if(ENABLE_USER_INTERFACE)
+ update();
+ return VALID;
+ }
+
+void Page::display_line_boundaries()
+/*--------------------------------------------------------------
+Primary Purpose: Display line boundaries in TCL/TK. Called from
+setLines if ENABLE_USER_INTERFACE and DISPLAY_LINE_BOUNDARIES are
+set to TRUE
+Effects: Draws a blue line between each line of text
+Rev: AR
+---------------------------------------------------------------*/
+{
+ int centerline, width;
+ for(int j=0; j < fnumLines; j++)
+ {
+ centerline = (flineinfo[j].fendrow + flineinfo[j + 1].fstartrow) / 2;
+ width = flineinfo[j + 1].fstartrow - flineinfo[j].fendrow;
+
+ scale(centerline);
+ scale(width);
+ /* having this pathname here is probably not such a good idea...*/
+
+ docommand(".main_window.display.work_space create line %d %d %d %d -width %d -fill blue -tags {project_ray IMAGE_TAG} -stipple @/usr/sww/share/tclX-7.3a/tkX/3.6a/demos/bitmaps/grey.25", 0, centerline, bmap()->imageWidth(), centerline, width);
+ }
+}
+
+
+int test_rlemap_lines(RLEMap* rmap)
+{
+ int length = rmap->imageLength();
+ for(int i = 0; i < length; i++)
+ printf("On line %d, numpixels = %d\n", i, rmap->fMapData[i]->numPixels);
+}
+
+
+MapStatus Page::extractComponents()
+/*--------------------------------------------------------------
+ Component extraction routines.
+*
+* Given the top and bottom line of a row we want to generate a list of
+* components. The general method is to find the closest dot, trace its
+* connected dots, then project upwards and downwards and add anything we
+* find there to the component. We will erase the component from the RLEMap
+* as it is added to the component list. By projecting up and down
+* from the piece we first find we should be able
+* to completely encompass characters like :;i?|! The only problems are
+* italic or ligatured characters where we may pick up two or more
+* characters at a time (which would be bad) or characters fragmented
+* with a vertical gap.
+
+Primary Purpose: Main extraction routine.
+Effects: Makes new components and puts them in a list. Deletes components
+ from RLE map. Fills in component boundaries and calls
+ Component::setProperties to set the property vector
+ Lastly convertMap is run to rebuild the RLEMap
+Constraints: Page::setLines() must be run first
+Rev: 11/2 JMH
+ 11/8 KM add set properties and
+ avgSpacing;
+---------------------------------------------------------------*/
+{
+ int currentCol, startRow, endRow, rowHeight;
+ ListElement* intrvl;
+ ListElement* tempintrvl;
+ /* printf("fnumLines = %d\n", fnumLines); */
+ Component* comp;
+ int totalSpacing = 0; // total blank horizontal pixels between components
+ int baselines[MaxVertSize]; // array for finding the baseline
+ last_status = 0.0;
+ int compCounter = 0;
+ int i;
+ int j;
+ printf("Extracting Components\n");
+ for (i = 0; i < fnumLines; i++) {
+ if(ENABLE_USER_INTERFACE)
+ set_component_status(i, fnumLines);
+ currentCol = 0;
+ startRow = flineinfo[i].fstartrow;
+ endRow = flineinfo[i].fendrow;
+ rowHeight = endRow - startRow;
+ assert(rowHeight > 0);
+
+ for (j=0; j < MaxVertSize; j++)
+ baselines[j] = 0;
+ fLineComponents[i] = new Components();
+
+
+ while (currentCol<=fRLEMap->imageWidth()) { //until we reach the end of the page
+
+ //Build component starting with closest black dot
+ intrvl = fRLEMap->FindNearHorizDot(currentCol, startRow, endRow);
+ if (intrvl == NULL) {
+ // printf("Reached end of line\n");
+ break;
+ }
+ comp = new Component(); //Make a new component named comp
+ assert(comp->AddToComponent(intrvl, fRLEMap));
+
+ //Now we want to extend upwards
+ //First check if there is a blank space to the right
+ tempintrvl = fRLEMap->FindNearHorizDot(comp->lr().x(),
+ startRow, endRow);
+ if (tempintrvl != NULL && ((RLEPair*) tempintrvl->item)->start >
+ comp->lr().x()+MinHorizSeparation+1)
+ while (comp->ul().y() < endRow) {
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(),
+ comp->lr().x(), comp->lr().y(),
+ startRow);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)))
+ break;
+ if (intrvl == NULL) break;
+ }
+ else
+ while (comp->ul().y() < endRow) {
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(),
+ comp->lr().x(), comp->ul().y(),
+ startRow);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)));
+ break;
+ if (intrvl == NULL) break;
+ }
+
+ //Now we want to extend downwards
+ while (comp->lr().y() > startRow) {
+ intrvl = fRLEMap->FindNearVertDot(comp->ul().x(), comp->lr().x(),
+ comp->lr().y(), endRow);
+ if ((intrvl != NULL) && (!comp->AddToComponent(intrvl, fRLEMap)))
+ break;
+ if (intrvl == NULL) break;
+ }
+
+ // Now we toss out the noise
+ int size;
+ if (comp != NULL) {
+ if (comp->ul() < Point(0,0))
+ printf("Here's a problem. %d, %d\n", comp->ul().x(), comp->ul().y());
+ else
+ size = fBitMap->pixelsInRegion(comp->ul(), comp->lr());
+ }
+ else
+ size = 0;
+ if (size < MinComponentSize) {
+// printf("Deleting some noise of size %d\n", size);
+ // printComponent(comp);
+ delete comp;
+ comp = NULL;
+ }
+ else
+ {
+ compCounter++;
+ // display a rectangle around the component
+ if(ENABLE_USER_INTERFACE)
+ {
+ if(DISPLAY_BOUNDING_BOXES)
+ comp->display_bounding_box();
+ }
+
+ // JMH - make an array of frequency of the y coord of bottom of comp
+ int vertOffset = endRow - comp->lr().y();
+ if(vertOffset < MaxVertSize && vertOffset >= 0)
+ baselines[vertOffset]++;
+
+
+ comp->setProperties(fBitMap);
+ if(fLineComponents[i]->last != NULL)
+ totalSpacing +=
+ comp->ul().x() -
+ ((Component *) (fLineComponents[i]->last->item))->lr().x();
+
+ fLineComponents[i]->Append(comp); // add this component to list
+ currentCol = (comp->lr()).x() + 1; // update position on page
+ }
+ }
+
+ // find most popular bottom of comp and call it the baseline
+ int counter = 0;
+ int baseline;
+ for (j=0; j < MaxVertSize; j++) {
+ if (counter < baselines[j]) {
+ counter = baselines[j];
+ baseline = endRow - j;
+ }
+ }
+ // printf("For row %d to %d baseline = %d\n", startRow, endRow, baseline);
+ // Now assign each character a group based on it's location
+ for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL;
+ ptr = ptr->next) {
+ comp = (Component*) ptr->item;
+ comp->charGroup = 0;
+
+ // if top of char is higher than top - tolerance
+ if (comp->ul().y() < startRow + (rowHeight/TopLineTolerance)) {
+ comp->charGroup += 2; //tall like a T
+ }
+
+ // if bottom of char is lower than base - tolerance
+ if (comp->lr().y() > baseline + (rowHeight/BaseLineTolerance)) {
+ comp->charGroup += 1; //has a tail like a y
+ } else
+ if (comp->lr().y() < (baseline - (2*rowHeight/BaseLineTolerance))) {
+ comp->charGroup = 4; //floating like a '
+ /* printf("bottom at %d < %d\n", comp->lr().y(),
+ baseline - (2*rowHeight/BaseLineTolerance)); */
+ }
+ // printf("added character in group %d\n", comp->charGroup);
+ }
+ }
+ /* printf("Found %d components on this page.\n", compCounter); */
+ // printComponents();
+ last_status = 0.0;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done extracting characters");
+ if((compCounter - fnumLines) > 0) /* don't want divide by zero */
+ {
+ favgSpacing = totalSpacing / (compCounter - fnumLines);
+ }
+ else
+ {
+ favgSpacing = 1;
+ }
+ delete fRLEMap;
+ fRLEMap = new RLEMap;
+ convertMap(fBitMap, fRLEMap);
+}
+
+void Page::printComponents()
+/*--------------------------------------------------------------
+Primary Purpose: Debugging routine that prints little bitmaps
+of low confidence characters
+---------------------------------------------------------------*/
+{
+ int compcounter = 0;
+ for (int i = 0; i < fnumLines; i++) {
+ Component* comp;
+ for (ListElement* ptr = fLineComponents[i]->first; ptr != NULL;
+ ptr = ptr->next) {
+ compcounter++;
+ comp = (Component *) ptr->item;
+ if (comp->confid() < (ConfidenceThreshold-20) && comp->asciiId() == 'n')
+ {
+ printf("Here's a poorly recognized component ul=%d,%d, lr=%d,%d.\n\n",
+ (comp->ul()).x(), (comp->ul()).y(),
+ (comp->lr()).x(), (comp->lr()).y());
+ printComponent(comp);
+ printf("properties: ");
+ printVector(comp->properties(), numProperties);
+ printf("I think it's a -> %c <- confidence: %d line: %d group: %d Comp#%d\n",
+ comp->asciiId(),
+ comp->confid(), i+1, comp->charGroup, compcounter);
+ printf("\n*******************************************************\n");
+ }
+ }
+ }
+}
+
+void Page::printComponent(Component* comp)
+// Print a single component.
+{
+ int right = comp->ul().x()+78;
+ if (comp->lr().x() < right)
+ right = comp->lr().x();
+
+ for (int r = comp->ul().y();
+ r <= comp->lr().y(); r++){
+ for (int c = comp->ul().x();
+ c <= right; c++)
+ bitprint(fBitMap->row(r)[c/8], c%8);
+ printf( "\n");
+ }
+}
+
+int spacing(ListElement * compa, ListElement * compb);
+// helper function for extractWords (defined below)
+
+MapStatus Page::extractWords()
+/*--------------------------------------------------------------
+Primary Purpose: Extract words from each lines components
+Effects: sets the fWordsList to be a list of all of the words
+in the document.
+Constraints: extractComponents must be run first
+Rev: KM 11/7/95
+---------------------------------------------------------------*/
+{
+ bool inWord;
+ ListElement * start; // word Start
+ int count; // counts the characters in the word
+ int word_count = 0;
+ int spacingThreshold = (int) (1.25 * ((float) (favgSpacing)));
+ fWordList = new Words;
+ last_status = 0.0;
+ for (int i = 0; i < fnumLines; i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_extract_status(i, fnumLines);
+ inWord = FALSE;
+ for(ListElement *ptr = line(i)->first; ptr != NULL; ptr = ptr->next) {
+ if(!inWord)
+ {
+ start = ptr;
+ count = 1;
+ inWord = TRUE;
+ }
+ if( spacing(ptr, ptr->next) > spacingThreshold)
+ {
+ Word * newWord = new Word(start,count);
+ (words())->Append(newWord);
+ if(1)
+ printf("%s ",newWord->characters);
+ inWord = FALSE;
+ word_count++;
+ }
+ else
+ count++;
+ }
+ // Add in a separate word for new line
+ Word * newWord = new Word("\n",2);
+ (words())->Append(newWord);
+ printf("%s", newWord->characters);
+ word_count++;
+ }
+ last_status = 0.0;
+ fWordList->num_words = word_count;
+ if(ENABLE_USER_INTERFACE)
+ set_status("Done extracting words");
+ return VALID;
+}
+
+void Page::spellcheck()
+/*--------------------------------------------------------------
+Primary Purpose: Run spell checker on word list.
+Constraints: extractWords must be run first
+Rev: AR
+---------------------------------------------------------------*/
+{
+ int word_count = 0;
+ Word* temp_word;
+ for(ListElement* ptr = (words())->first; ptr != NULL; ptr = ptr->next)
+ {
+ word_count++;
+ if(ENABLE_USER_INTERFACE)
+ set_spellcheck_status(word_count, fWordList->num_words);
+ temp_word = (Word*)ptr->item;
+ if(0)
+ printf("Spellchecking word %s\n", temp_word->characters);
+ if(mispelled(temp_word->characters))
+ {
+ temp_word->mispelled = TRUE;
+ }
+ }
+}
+
+int Page::spacing(ListElement * compa, ListElement * compb)
+// spacing from end of comp_a to begining of comp_b
+{
+ int x;
+ if (compb == NULL) return 1000; // end of line
+
+ Component * a = ((Component *) (compa)->item);
+ Component * b = ((Component *) (compb)->item);
+ int returnval = (b->ul().x() - a->lr().x());
+ if (returnval < 0)
+ {
+ return 0;
+ }
+ assert (returnval >= 0);
+ return returnval;
+
+}
+
+
+void Page::printWords()
+// Prits out each component of each word. This can take a very long time
+{
+
+ Word * thisWord;
+ for (ListElement * ptr = words()->first; ptr !=NULL; ptr= ptr->next)
+ {
+ thisWord = (Word *) ptr->item;
+ printf("!!!!!! NEW WORD %s confid : %d !!!!!\n", thisWord->characters, thisWord->confid);
+ for(int i = 0; i < thisWord->charCount; i++)
+ {
+ Component * comp = thisWord->character[i];
+ if (comp == NULL) continue;
+ printf("Printing a component ul=%d,%d, lr=%d,%d.\n\n",
+ (comp->ul()).x(), (comp->ul()).y(),
+ (comp->lr()).x(), (comp->lr()).y());
+ for (int r = comp->ul().y();
+ r <= comp->lr().y(); r++){
+ for (int c = comp->ul().x();
+ c <= comp->lr().x(); c++)
+ bitprint(fBitMap->row(r)[c/8], c%8);
+ printf( "\n");
+ }
+ printf("properties: ");
+ printVector(comp->properties(), numProperties);
+ printf("Identification: %c distance: %d confidence %d\n",
+ comp->asciiId(),
+ comp->distance(&LearnedChars[comp->asciiId()]),
+ comp->confid());
+ printf("\n***********************************************\n");
+ }
+ }
+}
+
+MapStatus Page::recognize()
+/*--------------------------------------------------------------
+Primary Purpose: Recognize entire page. Sets font and ascii id of
+each component
+Return Value: VALID if no error occurred OTHERERROR otherwise
+Constraints: extractComponents must be run first.
+See recognize(line) below for more detailed info
+Rev: KM
+---------------------------------------------------------------*/
+{
+ printf("Recognizing document\n");
+ last_status = 0.0;
+ for (int i = 0; i< fnumLines; i++)
+ {
+ if(ENABLE_USER_INTERFACE)
+ set_recognize_status(i, fnumLines);
+ recognize(i);
+ }
+
+ last_status = 0.0;
+ return VALID;
+
+}
+
+
+MapStatus Page::recognize(int linenum)
+/*--------------------------------------------------------------
+Primary Purpose: Recognize a line of connected components
+Arguments: linenum is line number to recognize
+Effects: sets ascii identification fontid and confidence in each component
+If confidence is low and character is big enough for two characters.
+divideAndRecognize is called to split up the component.
+Constraints: extractComponents must be run first
+Rev: KM 11/9/95
+---------------------------------------------------------------*/
+{
+ Component * comp;
+ Distance d;
+
+ for(ListElement *ptr = line(linenum)->first; ptr != NULL; ptr = ptr->next)
+ {
+ comp = (Component *) ptr->item;
+
+ d = comp->recognize(LearnedGroups);
+ if (comp->confid() < ConfidenceThreshold &&
+ comp->width() > 2*MinWidth) // really wide
+ divideAndRecognize(line(linenum), ptr, d);
+ }
+
+ return VALID;
+}
+
+
+
+void Page::divideAndRecognize (Components *list, ListElement * ptr, Distance d)
+/*--------------------------------------------------------------
+Primary Purpose: Identify and separate merged characters
+Arguments:ptr is a pointer to a list element containing a component
+ d is the current recognition distance on the component
+Effects: Subdivides component into two parts, Division is made at
+ the minimum vertical height of the component. If the
+ minHeight > JoinTolerance no divison will be made.
+ (JoinTolerance is a global var that determines
+ the maximum number of merged pixels that are allowed in a
+ column for a division to be made)
+ When a division is made. The component's boundaries are
+ adjusted accordingly and a new component is inserted into
+ the list.
+
+ Returns if distance is acceptable or width of component
+ is <= MinWidth*2
+Rev: KM 11/24/95
+---------------------------------------------------------------*/
+{
+ Component * comp = (Component *) ptr->item;
+ Component * newComp;
+ bool allGroups = TRUE;
+
+ // Save the original component boundaries just in case we cant improve
+ Point oldlr = comp->lr();
+ Point oldul = comp->ul();
+ int oldwidth = (int) comp->width();
+
+ // Some easy access x,y coordinates
+ int ulx = comp->ul().x();
+ int uly = comp->ul().y();
+ int lrx = comp->lr().x();
+ int lry = comp->lr().y();
+
+ Distance newdist, bestdist;
+ int bestlrx;
+
+ if (comp->confid() > ConfidenceThreshold)
+ return;
+
+ if (oldwidth < MinWidth*2) // cant be split in two
+ {
+ return;
+ }
+
+ // Determine where to split. Split at the thinnest point
+ // within JoinTolerance (maximum number of pixels that might be fused)
+
+ int minHeight = (int)comp->height();
+ bestlrx = comp->lr().x();
+ for(int i = MinWidth; i < oldwidth - MinWidth; i++)
+ {
+ int newHeight =
+ fBitMap->pixelsInRegion(Point(ulx+i,uly), Point(ulx+i,lry));
+ if (newHeight < minHeight)
+ {
+ minHeight = newHeight;
+ bestlrx = ulx+i;
+ }
+ }
+// printf("bestlrx = %d, minHeight = %d\n", bestlrx, minHeight);
+
+
+ if (bestlrx < lrx && minHeight < JoinTolerance)
+ {
+ comp->lr().x() = bestlrx;
+ int shrunk = comp->vertShrink(fBitMap);
+ comp->setProperties(fBitMap);
+ if (shrunk) // ignore group if we had to shrink down
+ newdist = comp->recognize(LearnedGroups, allGroups);
+ else
+ newdist = comp->recognize(LearnedGroups);
+
+// printf("Distance = %u asciiid = %c \n", newdist, comp->asciiId());
+
+ Component * newcomp = new Component(Point(bestlrx+1, oldul.y())
+ , oldlr);
+ newcomp->vertShrink(fBitMap);
+ newcomp->setProperties(fBitMap);
+ int newcompdist = newcomp->recognize(LearnedGroups,allGroups);
+
+ if (newdist < d)
+ list->insertAfter(ptr, newcomp);
+ else
+ {
+ comp->ul() = oldul;
+ comp->lr() = oldlr;
+ comp->setProperties(fBitMap);
+ comp->recognize(LearnedGroups);
+ delete newcomp;
+ }
+ return;
+ }
+
+
+ return;
+
+}
+
+
+void Page::uniteAndRecognize (Components *list, ListElement * ptr, Distance d)
+/*--------------------------------------------------------------
+Primary Purpose: Identify and merge a separated character
+Arguments:ptr is a pointer to a list element containing a component
+ d is the current recognition distance on the component
+Effects: Unite two components into one.
+
+Rev: JMH 12/10/95
+---------------------------------------------------------------*/
+{
+ Component * part1 = (Component *) ptr->previous->item;
+ Component * part2 = (Component *) ptr->item;
+ Point ul, lr;
+ ul = part1->ul();
+ lr = part2->lr();
+ if (ul.y() > lr.y() || ul.x() > lr.x())
+ return;
+ Component * newcomp = new Component(part1->ul(), part2->lr());
+
+ newcomp->setProperties(fBitMap);
+ if (part1->charGroup <= 3 && part2->charGroup <= 3)
+ newcomp->charGroup = (part1->charGroup | part2->charGroup);
+ else if (part1->charGroup == 4)
+ newcomp->charGroup = (part2->charGroup | 2);
+ else
+ newcomp->charGroup = (part1->charGroup | 2);
+ if (newcomp->charGroup > 4) newcomp->charGroup = 4;
+
+ int newdist = newcomp->recognize(LearnedGroups);
+
+ if (newdist < d) {
+ list->removeAt(ptr->previous);
+ list->insertAfter(ptr, newcomp);
+ list->removeAt(ptr);
+ } else
+ delete newcomp;
+ return;
+
+}
+
+
+int Page::writeWordPos(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Writes word position, confidence, length and string to file
+Arguments: output file name
+Return Value: 1 if successful. 0 if an error occured
+Effects: Calls fWordList->printWordPos
+ // Output format for each word
+ "%6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
+ word->confid, word->charCount, word->characters
+Rev: 11/25/95
+---------------------------------------------------------------*/
+{ return fWordList->writeWordPos(filename);};
+
+int Page::writeAscii(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word list to asii file
+Arguments: filename to write to
+Return Value: 1 if successful 0 if unsuccessful
+Effects: Calss fWordList->writeAscii(filename)
+Writes words to fill in text format using MinLineSize
+to differentiate lines.
+Rev: 11/25 KM
+---------------------------------------------------------------*/
+
+{return fWordList->writeAscii(filename);};
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Page.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,131 @@
+/* Page.h
+ The Page class is the primary class used for the OCR system
+ It has two data representations of a page of text, an RLEMap
+ and a BitMap.
+
+*/
+
+#ifndef _PAGE_H
+#define _PAGE_H
+#include "system.h"
+#include "Component.h"
+#include "RLEMap.h"
+#include "BitMap.h"
+#include "LineMarker.h"
+#include "Word.h"
+#include "tcl_interface.h"
+
+class Page {
+public:
+ // Constructor, Destructor
+ friend int main(int argc, char** argv);
+ friend void testocr(int argc, char ** argv);
+ Page();
+ ~Page();
+
+
+ // Read is from 2 level TIFF files.
+ // Calls BitMap function. readMap then converts to RLE
+
+ MapStatus readMap(char * filename); // Calls BitMap::readMap
+
+ Angle skewAngle(); // returns skew estimate
+ int deskew(int deskew_method); /* one for rle, 0 for bitmap rot */
+
+ MapStatus setLines();
+ /* Sets fnumlines to the # of text lines
+ and dimensions and sets flineinfo for start and end row
+ for each line. */
+ void Page::display_line_boundaries();
+ /* highlights the space between lines of text in TCL//TK */
+
+ MapStatus extractComponents();
+ /* Extract Component information for each line of text
+ Does connected component analysis then projects up and
+ down to catch circumflexes. A component list is created
+ for each line of text Assumes image has already been deskewed
+ using deskew and that setLines has been run to determine
+ text line boundaries ***/
+
+ MapStatus recognize();
+ /* Recongnize whole page. Run after extractComponents.
+ learn() or readLearnedGroups() must also be run
+ before this function **/
+
+ MapStatus recognize(int linenum); // just one line
+ /* Recognize characters
+ Perform Character Recogition on a line of components.
+ Use the global variable LearnedGroups for comparison.
+ ***/
+
+ MapStatus extractWords();
+ /* Find the start and end of words using avgSpacing and
+ add to word list fWordList */
+
+ void spellcheck();
+ /* spellcheck the list of words (set the mispelled field
+ in each word) */
+
+ int send_words_to_tcl();
+ /* Send words to user interface */
+ int writeWordPos(char * filename);
+ /* Write upper left point coordinates, confidence and translation to
+ file */
+ int writeAscii(char * filename);
+ /** Write words out to acii file **/
+
+
+ void printComponents();
+ /* Prints out a little bitmap for each bad component in the list.
+ Uses ConfidenceThreshold as a cutoff for printing characters.
+ Just used for debugging*/
+ void printComponent(Component* comp);
+
+ void Page::printWords();
+ /* prints out bitmap for each component delimiting between words.*/
+
+
+ int get_height();
+ int get_width();
+ inline BitMap * bmap() {return fBitMap;};
+ inline RLEMap * rmap() {return fRLEMap;};
+ inline Words * words() {return fWordList;};
+ inline Components * line(int i) {return fLineComponents[i];};
+
+ int numLines() {return fnumLines;};
+ LineMarker * lineinfo() {return flineinfo;};
+ int avgSpacing() {return favgSpacing;};
+
+private:
+ int fnumLines; // Number of lines
+ LineMarker * flineinfo; // for each line - start and end row
+ // in RLEMap
+ int favgSpacing ; // Avg spacing between comp (in pixels)
+ Components ** fLineComponents; // A list of components for each line
+
+ Words * fWordList; // A list of words in the document
+ RLEMap * fRLEMap; // Pointer to an RLEMap represntation
+ BitMap * fBitMap; // Pointer to BitMap representation
+
+ int spacing(ListElement * compa, ListElement * compb);
+ // helper function for extractWords
+ // Returns # of horizontal blank pixels between 2 components
+ void divideAndRecognize (Components * list, ListElement * ptr, Distance d);
+ void uniteAndRecognize(Components * list, ListElement * ptr, Distance d);
+};
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Point.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,53 @@
+#ifndef _POINT_H
+#define _POINT_H
+
+class Point{
+ public:
+ Point(int xCoord = -1, int yCoord= -1)
+ : fx(xCoord), fy(yCoord) {};
+
+ inline int & x(){return fx;}
+ inline int & y() {return fy;};
+
+ // Relational operators
+
+ inline bool operator ==(Point p)
+ { return (fx == p.x() && fy == p.y());};
+ inline bool operator !=(Point p)
+ { return (fx != p.x() || fy != p.y());};
+ inline bool operator < (Point p)
+ { return (fx < p.x()) && (fy < p.y()); };
+ inline bool operator > (Point p)
+ { return (fx > p.x()) && (fy > p.y()); };
+ inline bool operator <=(Point p)
+ { return (fx <= p.x()) && (fy <= p.y()); };
+ inline bool operator >=(Point p)
+ { return (fx >= p.x()) && (fy >= p.y()); };
+
+ private:
+ int fx;
+ int fy;
+};
+
+
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/RLEMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,564 @@
+#include "system.h"
+#include "RLEMap.h"
+#include "RLEPair.h"
+#include "tcl_interface.h"
+#include "status_message.h"
+
+ /* *****************************************************************
+ * RLEMap.cc - Member functions for an RLEMap *
+ *
+ * RLEMap() - Constructor
+ * ~RLEMap() - Destructor
+ *
+ * int imageLength();
+ * int imageWidth();
+ * MapStatus & status;
+ *
+ * Below is an index of the other functions and the files where they
+ * appear.
+ *
+ * MapStatus readMap(char * filename) - RLEMap_readMap.cc
+ * MapStatus WriteMap(char * filename);
+ *
+ * // Data Access and low level manipulation functions
+ * RLEPairs * row(int i) - Returns a pointer to the list of RLEPairs
+ * for row i.
+ * MapStatus setBit(Point point, Color clr);
+ * Color readBit(Point point);
+ *
+ *
+ ***************************************************************/
+
+RLEMap::RLEMap()
+: fMapData(NULL), fImageLength(0), fImageWidth(0), fStatus(EMPTY)
+/*--------------------------------------------------------------
+Primary Function: Constructor
+Return Value: pointer to new RLEMap
+Effects: Initialize status to empty other values to zero
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{ }
+
+
+
+ RLEMap::~RLEMap()
+/*--------------------------------------------------------------
+Primary Purpose: destructor
+Effects: Deletes each row of RLEPairs then the array of rows
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ if (fMapData != NULL)
+ {
+ int i;
+
+
+ // delete each row
+ for (i=0; i< fImageLength; i++)
+ {
+ delete fMapData[i];
+ }
+ // delete array of rows
+ delete fMapData;
+ }
+};
+
+int & RLEMap::imageLength()
+/*--------------------------------------------------------------
+Return Value: vertical length of image in pixels
+Constraints: readMap() must have been run and fStatus be VALID
+Rev: 10/6 KM
+---------------------------------------------------------------*/
+ {
+ return fImageLength;
+
+};
+
+
+int & RLEMap::imageWidth()
+/*--------------------------------------------------------------
+Return Value: horizontal width of image in pixels
+Constraints: readMap() must have been run and fStatus be valid
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ return fImageWidth;
+
+}
+
+
+MapStatus & RLEMap::status()
+/*--------------------------------------------------------------
+Return Value: return reference to current status EMPTY, VALID etc..
+Rev: 10/6/95 KM
+---------------------------------------------------------------*/
+{
+ return fStatus;
+
+}
+
+
+
+RLEPairs * RLEMap::operator [](int i)
+/*--------------------------------------------------------------
+Arguments: i is the row # of the RLEPair list to be returned
+Return Value: A pointer to the list of RLEPairs in row i
+Rev: 10/20/95 KM
+---------------------------------------------------------------*/
+{
+
+ return fMapData[i];
+}
+
+
+RLEPairs * RLEMap::row(int i)
+// Same as overloaded [] function above
+{
+ return fMapData[i];
+}
+
+
+
+MapStatus RLEMap::readMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read an RLEMap from a TIFF file
+Arguments: filename of TIFF file
+Return Value: A MapStatus, either VALID or READERROR
+Effects:
+ * RLEMap::readMap(filename) will read a two level TIFF file
+ * and place it in an RLEMap. The private fields of the RLEMap
+ * set are:
+ fImageWidth - the pixel width of the image
+ fImageLength - the vertical pixel length of the image
+ fstat - the status of the image VALID or READERROR
+ fMapData - an array of pointers to lists of RLEPairs
+Constraints: filename must be a two level TIFF file
+Rev: 10/20/95 Portions Borrowed from Assignment 1
+---------------------------------------------------------------*/
+{
+ TIFF *tif;
+ unsigned char * buf;
+ short photometric;
+
+ // Open File - Read length and width
+
+ tif = TIFFOpen (filename, "r");
+ if(tif == NULL)
+ return READERROR;
+
+ TIFFGetField (tif, TIFFTAG_IMAGELENGTH, &fImageLength);
+ TIFFGetField (tif, TIFFTAG_IMAGEWIDTH, &fImageWidth);
+ TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photometric);
+
+ printf("open succeeded on file %s. length = %d. width = %d ",
+ filename, fImageLength, fImageWidth);
+ if(photometric == PHOTOMETRIC_MINISWHITE)
+ printf("min-is-white format\n");
+ else if(photometric == PHOTOMETRIC_MINISBLACK )
+ printf("min-is-black format\n");
+ else
+ printf("with an unknown photometric: %d\n", photometric);
+
+ // allocate buffer and array for data
+ int numCharsInBuf = fImageWidth / 8 +1 ;
+ buf = new unsigned char[numCharsInBuf];
+ fMapData = new (RLEPairs*)[fImageLength];
+
+ for (int row = 0; row < fImageLength; ++row)
+ {
+ TIFFReadScanline(tif,buf,row,0);
+ if(photometric != PHOTOMETRIC_MINISWHITE) /* invert anything except white */
+ invertBitsInBuffer(buf, numCharsInBuf);
+
+ // Create a list of RLEPairs for this row and fill with buffer data
+ fMapData[row] = new RLEPairs(row);
+ fMapData[row]->fill(buf, numCharsInBuf, row);
+ }
+ TIFFClose(tif);
+
+ return VALID;
+}
+
+short int RLEMap::grayScale(Point ul, Point lr)
+// Dummy function for now
+{
+ int numPixels = pixelsInRegion( ul, lr);
+ int area = (lr.x() - ul.x()+1) * (lr.y() - ul.y()+1);
+ if (area < numPixels) {
+ printf("Uh oh! Area = %d and pixels = %d\n", area, numPixels);
+ assert(area >= numPixels);
+ }
+ short int gscale =(short int)(((float)numPixels/area) * 255);
+
+ return gscale;
+ }
+
+int RLEMap::pixelsInRegion(Point ul, Point lr)
+{
+ assert (ul >= Point(0,0));
+ assert (ul <= lr);
+ assert (lr <= Point(fImageWidth, fImageLength));
+
+ int ulx = ul.x(); int uly = ul.y();
+ int lrx = lr.x(); int lry = lr.y();
+ int numPixels = 0;
+ RLEPairs * curRow;
+
+ for (int r = uly; r <= lry; r++)
+ {
+ curRow = row(r);
+ numPixels += curRow->pixelsBetween(ulx, lrx);
+// cout << curRow->pixelsBetween(ulx,lrx) <<" ";
+ // cout << numPixels << endl;
+ }
+
+ return numPixels;
+
+}
+
+ListElement*
+RLEMap::FindNearVertDot(int startCol, int endCol, int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Return closest interval to startRow within bounds of
+ startCol and endRow in the direction of endRow. Finds
+ closest dot vertically from startRow.
+Arguments: startRow is row to start from, startCol and endCol are
+ left and right boundaries of search. Search in the direction
+ of endRow.
+Return Value: An RLE interval - pointer to a list element in RLEPairs
+Effects:
+Constraints: startRow < endRow
+---------------------------------------------------------------*/
+{
+ ListElement* current;
+ if (startRow < endRow) {
+ for (int i = startRow+2; i <= endRow; i++) {
+ current = fMapData[i]->first;
+ while (current != NULL) {
+ if ((((RLEPair *) current->item)->start <= endCol)
+ && (((RLEPair *) current->item)->end >= startCol))
+ return current;
+ current = current->next;
+ }
+ }
+ } else {
+ for (int i = startRow-2; i >= endRow; i--) {
+ current = fMapData[i]->first;
+ while (current != NULL) {
+ if ((((RLEPair *) current->item)->start <= endCol)
+ && (((RLEPair *) current->item)->end >= startCol))
+ return current;
+ current = current->next;
+ }
+ }
+ }
+ return NULL;
+}
+
+
+
+
+
+ListElement*
+RLEMap::FindNearHorizDot(int startCol, int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Return closest interval to startCol within bounds of
+ startRow and endRow (startRow is lower). Finds
+ closest dot horizontally from startCol.
+Arguments: startCol is column to start from, startRow and endRow are
+ upper and lower boundaries of search
+Return Value: An RLE interval - pointer to a list element in RLEPairs
+Effects:
+Constraints: startRow < endRow
+---------------------------------------------------------------*/
+{
+ ListElement* answer = NULL;
+ ListElement* current;
+ int closest = fImageWidth;
+
+ for (int i = startRow; i <= endRow; i++) {
+ current = fMapData[i]->first;
+ while ((current != NULL) && (((RLEPair *) current->item)->end
+ < startCol)) {
+ current = current->next;
+ }
+ if ((current != NULL) && (((RLEPair *) current->item)->start < closest)) {
+ answer = current;
+ closest = ((RLEPair *) answer->item)->start;
+ }
+ }
+ return answer;
+}
+
+
+
+
+void testRLEMap(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Test the reading of tiff files into RLE format
+Effects: Reads filename, puts it into RLE format then prints
+Rev: 10/7/95 KM
+---------------------------------------------------------------*/
+{
+ RLEMap m;
+
+ m.readMap(filename);
+ if (m.imageLength() < 100) printMap(&m);
+ testpixelsBetween(&m); // In RLEPairs.cc - tests pixelsBetween function
+}
+
+
+void printMap(RLEMap * map)
+{
+ int startX = 0;
+ int endX = 0;
+ int pos;
+ RLEPair * item;
+ RLEPairs * rowdata;
+
+ RLEMap & m = *map;
+
+ for (int r = 0; r < m.imageLength(); r++)
+ {
+ startX = 0;
+ endX = -1;
+ rowdata = m[r];
+
+ for (ListElement* ptr = rowdata->first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ for ( pos = endX+1; pos< startX; pos++)
+ cout << " ";
+ endX = item->end;
+ for ( pos = startX; pos <= endX; pos++)
+ cout << "X";
+ }
+ cout << "" << endl;
+ }
+
+}
+
+void RLEMap::printPairs(int startRow, int endRow)
+/*--------------------------------------------------------------
+Primary Purpose: Prints RLE Pairs for this map from startRow to endRow
+Rev:11/2 KM
+---------------------------------------------------------------*/
+{
+ int startX, endX;
+ RLEPair * item;
+ RLEPairs * rowdata;
+
+ RLEMap & m = *this;
+ cout << "printing rows " << startRow << " to " << endRow << endl;
+ for (int r = startRow; r <= endRow; r++)
+ {
+ rowdata = m[r];
+
+ cout << "row " << r << " ";
+
+ for (ListElement *ptr = rowdata->first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ endX = item->end;
+ cout << "(" << startX << "," << endX <<")";
+ }
+ cout << endl;
+ }
+}
+
+void testpixelsBetween(RLEMap * map)
+// tests out a row by making sure that pixels between
+// 0 and ImageWidth - 1 == pixels in sub ranges of 29 pixels
+// Test performed on center row.
+{
+ int start = 0;
+ int end = 28;
+
+ int pcount;
+ int sum = 0;
+ RLEPairs * pairs;
+ int row;
+
+ for (row = 0; row < map->imageLength(); row++)
+ {
+ pairs = (*map)[row];
+ while (start <= map->imageWidth())
+ {
+ pcount = pairs->pixelsBetween(start, end);
+// printf("row %d col %d to %d - %d pixels\n",row, start,end,pcount);
+ sum += pcount;
+ start +=29;
+ end +=29;
+ }
+ if (sum !=0)
+ printf("row %d sum was %d , should be %d\n", row, sum, pairs->numPixels);
+ assert(sum == pairs->numPixels);
+ start = 0;
+ end = 28;
+ sum = 0;
+ }
+
+ delete pairs;
+}
+
+int RLEMap::deskew()
+/* going to be a (near-blind) steal from fateman */
+/*--------------------------------------------------------------
+Primary Purpose: deskewing an RLEMap
+Arguments: none
+Return Value: 1 if the page is altered, 0 if not
+Effects: RLEMap is straightened out
+Constraints: RLE shouldn't be tilted too much (< 10deg)
+Rev: AR 11/1/95
+---------------------------------------------------------------*/
+{
+ double skew = -get_skew(this); /* skew in rad */
+ if((skew >= MINIMUM_SKEW_ANGLE)||(skew <= - MINIMUM_SKEW_ANGLE))
+ {
+ double h = tan(skew / (180 / M_PI));
+ if(h > 0)
+ {
+ tilt_and_slant(1/h, 1); /* clockwise */
+ return 1;
+ }
+ else if (h < 0)
+ {
+ tilt_and_slant(-(1/h), -1); /* counter clockwise */
+ return 1;
+ }
+ else
+ return 0;
+ }
+ else
+ return 0;
+}
+
+#define DEBUG_TILT_AND_SLANT 1
+void RLEMap::tilt_and_slant(double step, int direction)
+/*--------------------------------------------------------------
+Primary Purpose: do the work of shifting the RLEMap
+Arguments: step--something about how many rows to go before shifting,
+ direction--counterclockwise or clockwise
+Return Value: none
+Effects: rotates the RLEMap some ammount by tilting the map slightly,
+ then slanting it. (duh). Not an exact rotation
+Constraints:
+Rev: AR 11/1/95
+---------------------------------------------------------------*/
+{
+ if(DEBUG_TILT_AND_SLANT)
+ printf("Call to tilt_and_slant: step = %lf, direction = %d\n ", step, direction);
+ if(direction > 0)
+ {
+ tilt(step, direction);
+ slant(step, direction);
+ }
+ else
+ {
+ slant(step, -direction);
+ tilt(step, direction);
+ }
+}
+
+#define DEBUG_SLANT 1
+/* "slant a picture by shifting lines horizontally 1 bit every step rows"
+ ;; dir 1 means shift to right as row number increases
+ ;; dir -1 means shift by left
+ ;; this does not rotate the picture, since rows are each unchanged.
+ ;; the effect of a positive direction, say (slantpic pic 3 3 1)
+ ;; is to "italicize".
+*/
+void RLEMap::slant(double step, int direction)
+{
+ if(DEBUG_SLANT)
+ printf("Slant called, step = %lf, dir = %d\n", step, direction);
+ fImageWidth += (int)((double)fImageLength / (double)step);
+ int shift_amount = direction;
+ int num_steps = 1;
+ for(int i = 0; i < fImageLength; i++)
+ {
+ if(i > (num_steps*(int)step))
+/* if we have gone through step rows, increment the shift */
+ {
+ shift_amount += direction;
+ num_steps++;
+ }
+/* printf("Shifting row %d by %d\n", i, shift_amount); */
+ fMapData[i]->shift(shift_amount);
+ }
+}
+
+
+
+void RLEMap::display_intervals(char* color)
+{
+ if(!DISPLAY_IMAGE)
+ return;
+ double skip;
+ last_status = 0.0;
+ printf("SCALE_FACTOR = %lf ", SCALE_FACTOR);
+ skip = 1.0 / SCALE_FACTOR;
+ printf("Skip = %lf\n", skip);
+
+ /* delete any garbage hanging around */
+ docommand(".main_window.display.work_space delete all");
+
+ set_status("Displaying Image: 0%...");
+ for(int i= 0; (int)(i*skip) < ((double)fImageLength); i++)
+ {
+ set_display_status((int)(i*skip), fImageLength);
+ fMapData[(int)(i*skip)]->draw_pairs(i, color, 1.0/skip);
+ }
+ last_status = 0.0;
+ update();
+ set_status("Displaying Image: Done");
+}
+
+void RLEMap::tilt(double step, int direction)
+{
+/* printf("tilt called, step = %lf, dir = %d\n", step, direction); */
+ int old_height = fImageLength;
+ int new_height = /* ceiling */ (int)(((double)fImageWidth) / step) + old_height;
+ int delta = old_height - new_height;
+ RLEPairs ** new_data = new RLEPairs*[new_height];
+ for(int i = 0; i < new_height; i++)
+ {
+ new_data[i] = new RLEPairs(i);
+ }
+ for(int j = 0; j < old_height; j++)
+ {
+ tilt_row(j, delta, new_data, step, direction);
+ }
+ fMapData = new_data; /* probably want to delete old data */
+ fImageLength = new_height;
+ display_intervals("black");
+}
+
+
+void RLEMap::tilt_row(int old_row_index, int old_new_row_diff, RLEPairs** new_data, double step, int direction)
+{
+/* printf("Tilt row called: old row = %d, row diff = %d, step = %lf, dir = %d\n", old_row_index, old_new_row_diff, step, direction); */
+
+ double cur_x = 0; /* I don't know what will happen with negative rows */
+ double new_x;
+ int cur_y = old_row_index + (old_new_row_diff * direction);
+
+ while(((new_x = cur_x + step) < fImageWidth) &&
+ (cur_y >= 0) && (cur_y < fImageLength))
+ {
+ RLEPairs* new_pairs;
+ new_pairs = (fMapData[old_row_index])->extract((int) cur_x, (int)new_x);
+ new_data[cur_y]->merge(new_pairs);
+ cur_x = new_x + 1;
+ cur_y += direction;
+ }
+}
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/RLEMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,103 @@
+#ifndef _RLEMap_H
+#define _RLEMap_H
+#include "system.h"
+#include "RLEPair.h"
+#include <math.h>
+
+
+
+/* The RLEMap Class is a run length encoded representation of a bitmap
+ The I/O functions defined for this class read and write data from
+ TIFF format files using the Silicon Graphics TIFF library.
+
+*/
+
+class RLEMap{
+ public:
+
+ // Constructor, Destructor and copy functions
+ RLEMap();
+ ~RLEMap();
+
+ // Selector and Modifier functions
+
+ int & imageLength();
+ int & imageWidth();
+ inline int pixels_between(int start, int finish, int row_num){
+ return fMapData[row_num]->pixelsBetween(start, finish);
+ };
+
+ MapStatus & status();
+
+ void display_intervals(char* color);
+
+ ListElement* FindNearHorizDot(int startCol, int startRow, int endRow);
+ ListElement* FindNearVertDot(int startCol, int endCol,
+ int startRow, int endRow);
+
+ // I/O operations. Read and Write are from/to 2 level TIFF files
+
+ MapStatus readMap(char * filename);
+ MapStatus writeMap(char * filename);
+
+ // Data Access and low level manipulation functions
+ // Access a row of the Map
+ // usage: rmap[i] returns row i of the RLEMap.
+ // row performs the same function
+ RLEPairs * operator [](int i);
+ RLEPairs * row(int i);
+
+ int pixelsInRegion(Point ul, Point lr);
+ short int grayScale(Point ul, Point lr);
+
+
+ void tilt_and_slant(double, int);
+ int deskew();
+ void tilt(double , int);
+ void slant(double, int);
+ void tilt_row(int, int, RLEPairs** , double, int);
+ RLEPairs ** fMapData;
+ // Array length = fImageLength One list
+ // for each row
+
+
+ // prints RLEMap representation for row range
+ void printPairs(int startRow, int endRow);
+
+private:
+ // Size and status info
+ int fImageLength;
+ int fImageWidth;
+
+ MapStatus fStatus;
+};
+
+void printMap(RLEMap * map);
+void testpixelsBetween(RLEMap * map);
+double get_skew(RLEMap*);
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/RLEPair.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,357 @@
+#include "system.h"
+#include "tcl_interface.h"
+
+
+
+
+/**** RLEPair.cc
+ Member functions for RLEPairs
+ RLEPair functions defined in the function header
+ rev 10/20 Kathey Marsden
+*/
+
+
+
+
+RLEPairs::RLEPairs(int rownum)
+:List()
+{
+ numPixels = 0;
+ row = rownum;
+}
+
+
+RLEPairs::~RLEPairs()
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (RLEPair *) (ptr->item);
+ }
+
+ while(!IsEmpty())
+ Remove();
+}
+
+void RLEPairs::print_pairs()
+{
+ RLEPair* item;
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ printf("(%d->%d)", item->start, item->end);
+ }
+ printf("\n");
+}
+
+void RLEPairs::draw_pairs(int y_coord, char* color, double width)
+{
+ RLEPair* item;
+ scale(y_coord);
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ int line_start = item->start;
+ int line_end = item->end;
+ scale(line_start);
+ scale(line_end);
+ if(width > 1)
+ docommand(".main_window.display.work_space create line %d %d %d %d -fill %s -width %d", line_start, y_coord, line_end, y_coord, color, (int)width);
+ else
+ docommand(".main_window.display.work_space create line %d %d %d %d -fill %s", line_start, y_coord, line_end, y_coord, color);
+ }
+}
+
+void RLEPairs::shift(int bits)
+/*--------------------------------------------------------------
+Primary Purpose: shift a row right by bits
+Arguments: bits: the number of bits to shift by
+Return Value: none
+Effects: ...
+Constraints:
+Rev: 11/1 AR
+
+Someone should write a macro for this for loop. . .
+---------------------------------------------------------------*/
+{
+ RLEPair* item;
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ item->start += bits;
+ item->end += bits;
+ }
+}
+
+void RLEPairs::fill(unsigned char * contents, int contentsLength,
+ int contentsRow)
+/*--------------------------------------------------------------
+Primary Purpose: Take the contents of a line scanned from a
+TIFF file and put it into this list of RLEPairs
+Arguments: contents is the result of a TIFFReadScanline function
+ contentsLength is the number of unsigned chars in contents
+ contentsRow is the Row that this interval belongs to
+Effects: Scans contents and for each range of black pixels, adds
+ an RLEPair to the list
+Constraints:
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ if(contentsRow != row)
+ printf("Warning: merging to %d what appears to belong at %d\n", row, contentsRow);
+
+ bool inPair = FALSE; // Flag set to TRUE when Run of black starts
+ short int startCol =0;
+ short int endCol = 0;
+ short int curCol= 0;
+ unsigned char nextChar;
+
+// Deallocate old pairs
+ for(ListElement *ptr = first; ptr != NULL; ptr = ptr->next)
+ removeAt(ptr);
+
+ for (int c = 0; c < contentsLength; c++)
+ {
+ nextChar = contents[c];
+
+
+ if (nextChar == 255)
+ {
+ endCol = c*8+7;
+ // If this is a new pair we also have to set start col
+ if (!inPair)
+ {
+ startCol = c*8;
+ inPair = TRUE;
+ }
+ }
+ else if(nextChar == 0)
+ {
+ if (inPair)
+ {
+ RLEPair * pair = new RLEPair(startCol,endCol, contentsRow);
+ Append(pair);
+ inPair = FALSE;
+ numPixels += endCol - startCol +1;
+ }
+ }
+ else { // Start Shifting and look at each bit
+
+ // high bit on left
+ for (int i = 7; i >=0; i--)
+ {
+ curCol = 7 + c * 8 - i;
+ /*** Black Pixel handling */
+ if ((nextChar>>i)&1) // if this is a black pixel
+ {
+ if (!inPair) // If not in a Pair, Start of a new Pair
+ {
+ inPair = TRUE;
+ startCol = curCol;
+ endCol = curCol;
+ }
+ else // Extend current Pair
+ endCol = curCol;
+ }
+ /*** White Pixel Handling **/
+ else // This is a white pixel
+ if (inPair) // Close off the Pair add to the list
+ {
+ RLEPair * pair = new RLEPair(startCol,endCol, contentsRow);
+ Append(pair);
+ inPair = FALSE;
+ numPixels += endCol - startCol+1;
+ }
+ // if not in pair just continue
+ }
+ }
+ }
+}
+
+
+#define DEBUG_PIXELS_BETWEEN 0
+int RLEPairs::pixelsBetween(int startCol, int endCol)
+/*--------------------------------------------------------------
+Return Value: Returns the number of black pixels between column
+ startCol and column endCol.
+Constraints: 0 <= startCol < endCol; endCol < imageWidth of RLEMap
+Rev: 10/20 KM
+---------------------------------------------------------------*/
+{
+ if (DEBUG_PIXELS_BETWEEN)
+ {
+ printf("Call to pixels between: start = %d, finish = %d\n", startCol, endCol);
+ }
+/* assert(startCol < endCol); */
+ if(!(startCol < endCol))
+ {
+ printf("Warning, startcol %d not less then endcol %d (setting startcol = endcol)\n", startCol, endCol);
+ startCol = endCol;
+ }
+
+ int count= 0;
+ int pairStart;
+ int pairEnd;
+ RLEPair * item;
+
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ pairEnd = item->end;
+ pairStart = item->start;
+ // Don't loop anymore if past endCol
+ if (pairStart > endCol) break;
+
+ // range starts after this pair
+ if (pairEnd < startCol) ; // do nothing
+ // range starts and ends in this pair
+ else if (pairStart <= startCol && pairEnd >= endCol)
+ count += endCol - startCol +1;
+ // range starts in this pair but ends later
+ else if (pairStart <= startCol && pairEnd <= endCol)
+ count += pairEnd - startCol + 1;
+ // range includes this whole pair
+ else if (pairStart >= startCol && pairEnd <= endCol)
+ count += pairEnd - pairStart + 1;
+ // range ends in the middle of this pair
+ else
+ count += endCol - pairStart + 1;
+
+ // printf(" %d , %d - %d newcount\n", pairStart, pairEnd, count);
+ }
+
+
+
+ return count;
+
+}
+
+
+RLEPairs * RLEPairs::extract(int startCol, int endCol)
+/*--------------------------------------------------------------
+Primary Purpose: Makes a copy of a sectionn of this row
+Arguments: startCol - starting column to extract
+ endCol - ending column to extract
+Return Value: a pointer to a new RLEPairs that has been
+extracted from this.
+Constraints: start <= first col of first pair
+ end <= ending col of lat pair
+Rev: KM 11/16
+---------------------------------------------------------------*/
+{
+
+ int pairStart;
+ int pairEnd;
+
+ RLEPair * item;
+ RLEPairs * returnPairs = new RLEPairs(row);
+
+
+
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ item = (RLEPair *)(ptr->item);
+ pairEnd = item->end;
+ pairStart = item->start;
+ row = item->row;
+
+ // Don't loop anymore if past endCol
+ if (pairStart > endCol) break;
+
+ // range starts after this pair
+ if (pairEnd < startCol) ; // do nothing
+
+ // range starts and ends in this pair
+ else if (pairStart <= startCol && pairEnd >= endCol)
+ {
+ RLEPair * addpair = new RLEPair(startCol,endCol, row);
+ returnPairs->Append(addpair);
+ }
+ // range starts in this pair but ends later
+ else if (pairStart <= startCol && pairEnd <= endCol)
+ {
+ RLEPair * addpair = new RLEPair(startCol,pairEnd, row);
+ returnPairs->Append(addpair);
+ }
+
+ // range includes this whole pair
+ else if (pairStart >= startCol && pairEnd <= endCol)
+ {
+ RLEPair * addpair = new RLEPair(pairStart,pairEnd, row);
+ returnPairs->Append(addpair);
+ }
+ // range ends in the middle of this pair
+ else
+ {
+ RLEPair * addpair = new RLEPair(pairStart,endCol, row);
+ returnPairs->Append(addpair);
+ }
+
+ }
+/* printf("Extract returning: ");
+ returnPairs->print_pairs(); */
+ return returnPairs;
+}
+
+
+
+void RLEPairs::merge(RLEPairs * pairs)
+/*--------------------------------------------------------------
+Primary Purpose: Inserts the black regions of pairs into this.
+Arguments: pairs - RLEPair list to be combined with this one.
+Return Value: A new list that represtents merged data
+Effects: Modifies this to add pairs. deallocates pairs.
+Constraints:
+Rev: 11/16/95
+---------------------------------------------------------------*/
+{
+ if((!pairs) || (pairs->length == 0))
+ return;
+
+ int pairStart;
+ int pairEnd;
+
+ RLEPair * item;
+ int lastCol = ((RLEPair *) (last->item))->end;
+ int lastColpairs = ((RLEPair *) (pairs->last->item))->end;
+ if (lastCol < lastColpairs)
+ lastCol = lastColpairs;
+
+
+ int numChars = lastCol/8 + 1;
+ uchar buffer[numChars];
+ for(int i = 0; i < numChars; i++) buffer[i] = 0;
+
+ for (ListElement * ptr2 = pairs->first; ptr2 != NULL; ptr2 = ptr2->next)
+ {
+ item = (RLEPair *)(ptr2->item);
+ setRange(buffer, item->start, item->end);
+ }
+
+
+ for (ListElement * ptr=first; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ setRange(buffer, item->start, item->end);
+ }
+
+ fill(buffer, numChars, row);
+
+/* printf("Merged to form: ");
+ print_pairs(); */
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/RLEPair.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,68 @@
+#ifndef _RLEPair_H
+#define _RLEPair_H
+
+#include "list.h"
+
+
+// Indicates a series of black pixels in a row.
+class RLEPair {
+public:
+
+ RLEPair()
+ :start(0),end(0) {};
+
+ RLEPair(int first, int last, int thisRow)
+ :start(first), end(last), row(thisRow) {};
+ RLEPair(int first, int last)
+ :start(first), end(last), row(-1) {};
+ ~RLEPair() {};
+
+ short int start;
+ short int end;
+ short int row;
+};
+
+
+
+class RLEPairs :public List {
+ public:
+
+ RLEPairs(int row_num);
+ ~RLEPairs();
+
+ // Create RLEPair Representation of contents read from TIFF file
+ void fill(unsigned char * contents, int contentsLength, int contentsRow);
+ int pixelsBetween(int start, int end);
+ int numPixels;
+ void shift(int);
+ void draw_pairs(int y_coord, char* color, double width);
+ void print_pairs();
+
+ RLEPairs * extract(int startcol, int endcol);
+ // create a copy of this from startcol to endcol.
+ // and return pointer to RLEPairs
+
+ void merge(RLEPairs * pairs);
+ // Merges pairs into this.
+
+ private:
+ int row;
+
+
+};
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Word.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,150 @@
+#include "list.h"
+#include "system.h"
+#include "stdio.h"
+
+Word::Word(char * word, int length)
+/*--------------------------------------------------------------
+Primary Purpose: Create a word without pointers to components
+Arguments: word is the character string length is the length
+Words created with this constructor have a null component pointer
+Rev: 12/5/95
+
+---------------------------------------------------------------*/
+{
+ charCount = 1;
+ characters = new char[length];
+ strcpy(characters, word);
+ confid = 255;
+ ul = NOPNT;
+ lr = NOPNT;
+ character = NULL;
+ mispelled = 0;
+}
+
+
+Word::Word(ListElement * first, int length)
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments: first is a pointer to a ListElement that contains
+the first Component in the word. length is the number
+of components in the word.
+Effects: Sets all data members of the word class
+Rev: 11/6/95
+---------------------------------------------------------------*/
+{
+ mispelled = 0;
+ Component * firstComp = (Component *) first->item;
+ Component * item;
+ int i;
+ ListElement * ptr;
+
+
+ characters = new char[length+1];
+ character = new (Component *)[length];
+ charCount = length;
+ ul = firstComp->ul();
+
+ confid = 256;
+
+ for (i = 0, ptr = first; i < length; ptr = ptr->next, i++)
+ {
+ item = (Component *)(ptr->item);
+ characters[i] = item->asciiId();
+ character[i] = item;
+ if (item->confid() < confid)
+ confid = item->confid();
+ if(item->ul().y() < ul.y())
+ ul.y() = item->ul().y();
+ if(i == length-1) // this is the last character
+ lr = item->lr();
+ }
+
+
+ characters[length] = '\0';
+ if(0)
+ printf("Identified a word: %s\n", characters);
+
+};
+
+
+ Word::~Word()
+{
+ if (characters != NULL)delete characters;
+ if (character != NULL) delete character;
+};
+
+
+Words::~Words()
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ if (ptr->item != NULL)
+ delete (Word *) (ptr->item);
+ }
+
+}
+
+int Words::writeWordPos(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word position, confidence length and string to file
+Arguments: output file name
+Return Value: 1 for success 0 for file
+Effects: create and write out to filename each word in the following format
+All numeric fields are in fixed columns 7 characters wide
+ upperleft x pos
+ upperleft y pos
+ word confidence
+ character count
+ string
+ \N
+Rev: KM 11/25
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+
+ for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+ {
+ Word * word = (Word *) ptr->item;
+ if (word->ul.x() == -1) continue; // dont print new lines.
+ fprintf(outfile, " %6d %6d %6d %6d %s\n", word->ul.x(), word->ul.y(),
+ word->confid, word->charCount, word->characters );
+ }
+ fclose(outfile);
+ return 1;
+}
+
+
+int Words::writeAscii(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write word list to ascii file
+Arguments: filename to write to
+Return Value: 1 if successful 0 if unsuccessful
+Effects: Writes words to fill in text format
+
+Rev: 11/25 KM
+---------------------------------------------------------------*/
+{
+ FILE * outfile;
+ outfile = fopen(filename, "w");
+ Word * prev = (Word *) first->item;
+ if (outfile == NULL)
+ {
+ printf("Error openning %s", filename);
+ return 0;
+ }
+
+ for (ListElement * ptr = first; ptr !=NULL; ptr = ptr->next)
+ {
+ Word * word = (Word *) ptr->item;
+ fprintf(outfile, "%s ", word->characters );
+
+ }
+ fclose(outfile);
+ return 1;
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/Word.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,43 @@
+
+#ifndef _WORD_H_
+#define _WORD_H_
+
+#include "list.h"
+#include "Component.h"
+
+class Word {
+ public:
+
+ // Constructor scans through length components starting at first
+ // to set data fields
+ Word(ListElement * first, int length);
+ Word(char * word, int length);
+ ~Word();
+
+ char * characters; // text string
+ int charCount; // Number of characters in the string
+
+ short mispelled;
+ // spelled correctly? only valid if the word
+ // has been spellchecked
+
+ Confidence confid; // The average confidence of each char in word
+ Point ul; // upper left point of word
+ Point lr; // lower right point of word
+
+ Component ** character; // an array of components that
+ // make up this word
+};
+
+
+class Words:public List{
+
+ public:
+ int num_words;
+ Words():List(){};
+ ~Words();
+ int writeWordPos(char * filename);
+ int writeAscii(char * filename);
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/ascii.out Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,4 @@
+abcdefghijklmnopqrstuvwxyz: ;N
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ 0 1 234567890~ ! @#$%%^&* ()+=-,.<>/?'
+
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/bitmap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,47 @@
+#include "bitmap.h"
+#include "stdio.h"
+
+Bitmap::Bitmap(char* b, int h, int w)
+{
+ bits = b;
+ height = h;
+ width = w;
+}
+
+Bitmap::Bitmap(char* filename)
+{
+}
+
+void Bitmap::WriteToFile(char* filename)
+{
+ FILE* fileout;
+ if(! (fileout = fopen(filename, "w+")))
+ {
+ printf("Error while trying to write bitmap to %s\n", filename);
+ exit(1);
+ }
+ else
+ {
+ fprintf(fileout, "#define %s_width %d\n", filename, width);
+ fprintf(fileout, "#define %s_height %d\n", filename, height);
+ fprintf(fileout, "static char %s_bits[] = {\n", filename);
+ for(int row_counter = 0; row_counter < height; row_counter++)
+ {
+ for(int column_counter = 0; (8 * column_counter) < width; column_counter++)
+ {
+ fprintf(fileout, "%c,", bits[row_counter * width + column_counter]);
+ if((column_counter % 16) == 0)
+ fprintf(fileout, "\n"); /* put a newline once in a while */
+ }
+ }
+ fprintf(fileout, "};\n");
+ fclose(fileout);
+ }
+}
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/bitmap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,34 @@
+#ifndef BITMAP
+#define BITMAP 1
+
+class Bitmap
+{
+ private:
+ char* bits;
+ int height;
+ int width;
+ public:
+ Bitmap(char* b, int h, int w);
+ Bitmap(char* filename);
+ void WriteToFile(char* filename);
+ inline int Height()
+ {
+ return height;
+ }
+ inline int Width()
+ {
+ return width;
+ }
+ inline int PixelAt(int x, int y)
+ {
+ int offset = (y * (width / 8)) + (x / 8);
+ char mask = (char) (1 << (x % 8));
+ return (bits[offset] & mask);
+ }
+
+};
+
+#endif
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/browser.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,35 @@
+#!/usr/sww/bin/wish -f
+
+scrollbar .scroll -command ".list yview"
+listbox .list -yscrollcommand ".scroll set" -relief raised -geometry 20x20
+pack append . .scroll {right filly} .list {left expand fill}
+
+if {$argc > 0} {
+ set dir [lindex $argv 0]
+} else {
+ set dir .
+}
+foreach i [exec ls -aF $dir] {
+ .list insert end $i
+}
+proc browse {dir file} {
+ if {$dir != "."} {set file $dir/$file}
+ if [file isdirectory $file] {
+ exec browser.tcl $file &
+ } else {
+ if [file isfile $file] {
+ exec mx $file &
+ } else {
+ puts stdout "This is neither dir nor regular file"
+ }
+ }
+}
+
+bind .list <Control-c> {destroy .}
+bind .list <Double-Button-1> {
+ foreach i [selection get] {browse $dir $i}
+}
+focus .list
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/caution.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define WarningBits_width 32
+#define WarningBits_height 32
+static char WarningBits_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x10,0x01,
+ 0x00,0x00,0x08,0x07,0x00,0x00,0x08,0x0e,0x00,0x00,0x04,0x0e,0x00,0x00,0x04,
+ 0x1c,0x00,0x00,0x02,0x1c,0x00,0x00,0xe2,0x38,0x00,0x00,0xf1,0x39,0x00,0x00,
+ 0xf1,0x71,0x00,0x80,0xf0,0x71,0x00,0x80,0xf0,0xe1,0x00,0x40,0xf0,0xe1,0x00,
+ 0x40,0xf0,0xc1,0x01,0x20,0xf0,0xc1,0x01,0x20,0xf0,0x81,0x03,0x10,0xe0,0x80,
+ 0x03,0x10,0xe0,0x00,0x07,0x08,0xe0,0x00,0x07,0x08,0xe0,0x00,0x0e,0x04,0x00,
+ 0x00,0x0e,0x04,0xe0,0x00,0x1c,0x02,0xf0,0x01,0x1c,0x02,0xf0,0x01,0x38,0x01,
+ 0xe0,0x00,0x38,0x01,0x00,0x00,0x70,0x01,0x00,0x00,0x70,0xff,0xff,0xff,0x7f,
+ 0xf8,0xff,0xff,0x3f,0x00,0x00,0x00,0x00};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/convertMap.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,183 @@
+/** convertMap.h
+
+Functions for converting from one Map form to another
+and extracting smaller maps. There are four polymorphic
+functions. The general format is
+
+convertMap(MapType * source,MapType * target, Point ul, Point lr)
+
+MapTypes can be - BitMap or RLEMap,
+source - the map to be converted from
+target - The map to be converted to. Memory will be allocated
+ for target's contents.
+ul and lr mark a range to copy. If both are the global var NOPNT
+the entire map will be copied.
+
+Functions return a MapStatus which will be VALID or OTHERERROR
+if there was an error in the conversion. An error might be due
+to a user specifying an out of range ul and lr
+
+**************************************************************/
+#include "system.h"
+#include "Point.h"
+#include "BitMap.h"
+#include "RLEMap.h"
+#include "convertMap.h"
+
+/**** BitMap Conversion ***/
+
+MapStatus convertMap(BitMap * source, RLEMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Primary Purpose: Convert a bitMap to a RLEMap
+Arguments: Right now just does full bit map coversion ul/lr dont work
+Return Value: Returns valid if copy did not encounter any errors.
+Effects:
+Constraints: target is a pointer to an RLEMap
+Rev: 10/24/95
+---------------------------------------------------------------*/
+{
+
+ uchar * rowdata;
+ int numChars;
+
+ assert(target != NULL); // target must be allocated with new RLEMap
+ // before being passed to this function.
+ assert(source !=NULL);
+ target->imageWidth() = source->imageWidth();
+ target->imageLength() = source->imageLength();
+ target->status() = source->status();
+
+ target->fMapData = new (RLEPairs*)[target->imageLength()];
+ for (int i = 0; i < source->imageLength(); i++)
+ {
+
+ numChars= source->imageWidth() /8 + 1; // number of char entries
+ // Create a list of RLEPairs for this row and fill with buffer data
+
+ rowdata = source->row(i);
+
+ target->fMapData[i] = new RLEPairs(i);
+ target->fMapData[i]->fill(rowdata, numChars, i);
+
+ }
+
+
+return VALID;
+}
+
+
+
+/**** RLEMap Conversion ***/
+MapStatus convertMap(RLEMap * source, BitMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Purpose: Converts an RLEMap to a BitMap;
+Arguments: Right now just does full bit map coversion ul/lr dont work
+Return Value: Returns valid if copy did not encounter any errors.
+Effects:
+Constraints: target is a pointer to a BitMap which must be previously
+allocated with new BitMap;
+Rev: 10/24/95
+---------------------------------------------------------------*/
+{
+
+
+ RLEPairs * rmapRowData;
+ RLEPair * item;
+ int numChars,startX,endX;
+
+ assert(target != NULL); // target must be allocated with new BitMap
+ // before being passed to this function.
+ assert(source !=NULL);
+
+ target->imageWidth() = source->imageWidth();
+ target->imageLength() = source->imageLength();
+ target->status() = source->status();
+
+ target->fMapData = new (uchar *)[target->imageLength()];
+
+ for (int i = 0; i < source->imageLength(); i++)
+ {
+
+ numChars= source->imageWidth() /8 + 1; // number of char entries
+ // Convert RLEPairs to uchar array
+ rmapRowData = source->row(i);
+
+ target->fMapData[i] = new uchar[numChars];
+ for(int j=0; j < numChars; j++)target->fMapData[i][j] = 0;
+ // convert this row from RLE to uchars
+
+ ListElement *ptr = rmapRowData->first;
+ if (ptr == NULL) cout << "0" << endl;
+ for (; ptr != NULL; ptr = ptr->next)
+ {
+ item = (RLEPair *)(ptr->item);
+ startX = item->start;
+ endX = item->end;
+ setRange(target->fMapData[i], startX, endX);
+ }
+ cout << endl << endl;
+
+ }
+
+
+return VALID;
+
+
+}
+
+
+//MapStatus convertMap(RLEMap * source, RLEMap * target, Point ul, Point lr)
+/*--------------------------------------------------------------
+Primary Purpose:
+Arguments:
+Return Value:
+Effects:
+Constraints:
+Rev:
+---------------------------------------------------------------*/
+//{
+
+//return EMPTY;
+//}
+
+
+
+void testConvertMap(char * filename)
+// Reads in BitMap and converts to RLEMap then prints on screen
+// Next converts back to BitMap, back to RLEMap and prints
+{
+ BitMap * m = new BitMap;
+ RLEMap * rm = new RLEMap;
+
+
+ m->readMap(filename);
+ convertMap(m,rm);
+
+ cout << "length chk " << m->imageLength()<< "==" << rm->imageLength()<< endl;
+ cout << "width chk " << m->imageWidth()<< "==" << rm->imageWidth() << endl;
+ cout << "status chk"<<m->status() << "==" << rm->status() << endl;
+
+ printMap(rm);
+
+ // Now test conversion the other way
+ delete m;
+ m = new BitMap;
+ convertMap(rm,m);
+ delete rm;
+ rm = new RLEMap;
+ convertMap(m,rm);
+ printMap(rm);
+
+
+}
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/convertMap.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,53 @@
+#ifndef _CONVERTMAP_H
+#define _CONVERTMAP_H
+
+/** convertMap.h
+Functions for converting from one Map form to another
+and extracting smaller maps. There are four polymorphic
+functions. The general format is
+
+convertMap(MapType * source,MapType * target, Point ul, Point lr)
+
+MapTypes can be - BitMap, RLEMap, or VRLEMap
+source - the map to be converted from
+target - The map to be converted to. Memory will be allocated
+ for target's contents.
+ul and lr mark a range to copy. If either are the global var NOPNT,
+the entire map will be copied.
+
+Functions return a MapStatus which will be VALID or OTHERERROR
+if there was an error in the conversion. An error might be due
+to a user specifying an out of range ul and lr.
+
+**************************************************************/
+#include "system.h"
+#include "Point.h"
+#include "BitMap.h"
+#include "RLEPair.h"
+#include "RLEMap.h"
+#include "Point.h"
+
+
+
+/**** BitMap Conversion ***/
+MapStatus convertMap(BitMap * source, BitMap * target, Point ul = 0,
+ Point lr= 0);
+MapStatus convertMap(BitMap * source, RLEMap * target, Point ul= 0,
+ Point lr= 0);
+
+
+/**** RLEMap Conversion ***/
+MapStatus convertMap(RLEMap * source, BitMap * target, Point ul = 0,
+ Point lr = 0);
+MapStatus convertMap(RLEMap * source, RLEMap * target, Point ul= 0,
+ Point lr= 0);
+#endif
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/deskew.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,14 @@
+#include "deskew.h"
+#include "get_skew.h"
+#include "rotate.h"
+#include "bitmap.h"
+#include "RLEMap.h"
+
+void deskew(RLEMap* r)
+{
+ double skew = get_skew(r);
+ r->rotate(skew);
+}
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/deskew.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,7 @@
+#ifndef DESKEW
+#define DESKEW 1
+#include "bitmap.h"
+
+void deskew(Bitmap* b);
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/edit2.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define editor_width 32
+#define editor_height 32
+static char editor_bits[] = {
+ 0xe0,0xff,0x0f,0x00,0x30,0x00,0x08,0x00,0x28,0x70,0x09,0x00,0x24,0x00,0x08,
+ 0x00,0x22,0xd0,0x09,0x00,0x3f,0x00,0x08,0x00,0x01,0x00,0x08,0x00,0x71,0x01,
+ 0x08,0x00,0x01,0x00,0x08,0x00,0x01,0x00,0x08,0x00,0x61,0xb7,0x08,0x00,0x01,
+ 0x00,0x08,0x00,0xa1,0xdd,0x08,0x00,0x01,0x00,0x08,0x00,0x61,0xdb,0x08,0x00,
+ 0x01,0x00,0x08,0x00,0xa1,0xbb,0xe0,0x01,0x01,0x00,0x18,0x06,0x61,0xbb,0x04,
+ 0x08,0x01,0x00,0x62,0x10,0xe1,0xd6,0x99,0x20,0x01,0xf8,0xff,0x23,0x61,0x07,
+ 0x72,0x40,0x01,0x00,0x04,0xc0,0x01,0x00,0x18,0xa0,0x01,0x00,0xe8,0xd0,0x01,
+ 0x00,0x08,0xeb,0xff,0xff,0x0f,0xf4,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0xf0,
+ 0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0xc0};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/eye.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define eye_width 32
+#define eye_height 32
+static char eye_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0x07,0x00,0xff,0xff,0x3f,
+ 0x00,0x00,0xfc,0xff,0x00,0x00,0xc0,0xff,0x03,0x00,0x00,0xfe,0x0f,0x00,0x00,
+ 0xf0,0x1f,0x00,0x00,0xc0,0x3f,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0xfc,0x55,
+ 0x05,0x00,0xf0,0xaa,0x2a,0x00,0xc0,0x15,0xc0,0x00,0x00,0xaa,0x02,0x00,0x00,
+ 0x55,0x15,0x00,0x00,0xff,0xbf,0x00,0x00,0xff,0xff,0x01,0x00,0xff,0xff,0x07,
+ 0x00,0xff,0x3f,0x1f,0x00,0xff,0xff,0x70,0x00,0xff,0xff,0xc3,0x00,0xbf,0xff,
+ 0x87,0x01,0xd7,0xff,0x1f,0x02,0x97,0x9f,0x3c,0x04,0x0e,0x4f,0x78,0x04,0x38,
+ 0x20,0xe0,0x08,0xf0,0x00,0xe0,0x01,0x80,0x0f,0x20,0x03,0x00,0xf8,0x07,0x01,
+ 0x00,0x00,0x38,0x00,0x00,0x00,0x40,0x02};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/face_happy.xbm Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,12 @@
+#define noname_width 32
+#define noname_height 32
+static char noname_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x0f,0x00,0x00,0x1c,0x70,
+ 0x00,0x00,0x02,0x80,0x00,0x80,0x01,0x00,0x03,0x40,0x00,0x00,0x04,0x40,0x00,
+ 0x00,0x04,0x20,0x00,0x00,0x08,0x10,0x20,0x08,0x10,0x10,0x20,0x08,0x10,0x10,
+ 0x20,0x08,0x10,0x08,0x20,0x08,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,
+ 0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,0x20,0x08,0x00,0x00,
+ 0x20,0x10,0x00,0x00,0x10,0x10,0x08,0x20,0x10,0x10,0x04,0x40,0x10,0x20,0x0a,
+ 0xa0,0x08,0x40,0x30,0x18,0x04,0x40,0xc0,0x07,0x04,0x80,0x01,0x00,0x03,0x00,
+ 0x02,0x80,0x00,0x00,0x1c,0x70,0x00,0x00,0xe0,0x0f,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/get_skew.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,97 @@
+#include "get_skew.h"
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+#include "tcl_interface.h"
+
+int docommand(char* fmt, ...);
+
+struct angle_and_sd
+{
+ double angle;
+ double sd;
+};
+
+#define DEBUG_GET_SKEW 1
+
+double get_skew(RLEMap* r)
+/*--------------------------------------------------------------
+Primary Purpose: Determine the angle of rotation of the RLEMap r
+Arguments: pointer to an RLEMap
+Return Value: detected angle of rotation
+Rev: AR
+---------------------------------------------------------------*/
+{
+
+ int i,j,max_index;
+ double max_sd = 0;
+ double test_angle, angular_skip, angle_increment;
+ Histogram* temp_hist;
+ angle_and_sd Angles_and_Sds[20];
+
+ struct timeval tv;
+ double start_time;
+ gettimeofday(&tv, NULL);
+ start_time = (double)tv.tv_sec;
+ printf("Starting deskew process: time = %lf\n", ((double) tv.tv_sec) - start_time);
+ if(ENABLE_USER_INTERFACE)
+ set_status("Finding Skew. . .");
+/* first run, angular spacing = 1 degree */
+
+ test_angle = -1;
+ angular_skip = 1;
+ double loop_start_angle = 0;
+ double loop_step = 1;
+ for(i = 0; i < 2; i++)
+ {
+ temp_hist = project_histogram(r, loop_start_angle);
+ Angles_and_Sds[0].angle = loop_start_angle;
+ Angles_and_Sds[0].sd = temp_hist->get_standard_dev();
+
+ temp_hist = project_histogram(r, loop_start_angle + loop_step);
+ Angles_and_Sds[1].angle = loop_start_angle + loop_step;
+ Angles_and_Sds[1].sd = temp_hist->get_standard_dev();
+
+ if(Angles_and_Sds[1].sd > Angles_and_Sds[0].sd)
+ {
+ angle_increment = loop_step;
+ max_index = 1;
+ test_angle = Angles_and_Sds[1].angle + angle_increment;
+ }
+ else
+ {
+ max_index = 0;
+ angle_increment = -loop_step;
+ test_angle = Angles_and_Sds[0].angle + angle_increment;
+ }
+ for(j = 2; j < 10; j++)
+ {
+ temp_hist = project_histogram(r, test_angle);
+ Angles_and_Sds[j].angle = test_angle;
+ Angles_and_Sds[j].sd = temp_hist->get_standard_dev();
+ if(Angles_and_Sds[j].sd > Angles_and_Sds[max_index].sd)
+ {
+ max_index = j;
+ test_angle += angle_increment;
+ }
+ else
+ break;
+ }
+ loop_start_angle = Angles_and_Sds[max_index].angle;
+ loop_step = 0.1;
+ }
+ gettimeofday(&tv, NULL);
+ /* printf("Done findinge skew. Elapsed time = %lf\n", ((double) tv.tv_sec) - start_time); */
+ if(ENABLE_USER_INTERFACE)
+ {
+ set_status("Skew angle = %.3lf", Angles_and_Sds[max_index].angle);
+ update();
+ }
+ return Angles_and_Sds[max_index].angle;
+}
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/get_skew.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,8 @@
+#ifndef GETSKEW
+#define GETSKEW 1
+#include "project.h"
+#include "histogram.h"
+
+double get_skew(RLEMap* r);
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/histogram.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,84 @@
+#include <math.h>
+#include "histogram.h"
+#include "tcl_interface.h"
+
+void Histogram::initialize_mean()
+{
+ int total_weight = 0;
+ int i;
+ for(i = 0; i < num_rows; i++)
+ {
+ total_weight = total_weight + row_weights[i];
+ }
+ mean = (double) total_weight / (double) num_rows;
+}
+
+inline double sqr(double x) {return(x*x);}
+
+void Histogram::initialize_variance()
+{
+ double sum = 0;
+ for(int i = 0; i < num_rows; i++)
+ {
+ sum += sqr(row_weights[i] - mean);
+ }
+ variance = (double) sum / (double) num_rows;
+}
+
+void Histogram::initialize_standard_dev()
+{
+ standard_dev = sqrt(variance);
+}
+#define SKIP 5
+/* should get this skip from project.cc (!!) */
+Histogram::display()
+{
+#if 0
+ docommand(".histogram.c delete hist");
+ docommand(".histogram.c create text 275 300 -font -adobe-helvetica-medium-o-normal--34-240-100-100-p-176-iso8859-1 -text \"%.3lf degrees, SD = %.3lf\" -tags hist", cut_angle, standard_dev);
+ for(int i = 0; i < num_rows; i++)
+ {
+ docommand(".histogram.c create line 0 %d %d %d -fill blue -tags {hist} -width %d", i*SKIP, row_weights[i], i*SKIP, SKIP);
+ }
+ docommand("update");
+#endif
+}
+
+Histogram::Histogram(int n_rows, int* r_weights, double c_angle)
+{
+ num_rows = n_rows;
+ row_weights = r_weights;
+ cut_angle = c_angle;
+ initialize_mean();
+ initialize_variance();
+ initialize_standard_dev();
+}
+
+
+
+
+
+
+
+
+
+
+double find_int_sd(int* values, int num_values)
+/* find the standard deviation of an array of ints */
+{
+ int total_weight = 0;
+ int i;
+ for(i = 0; i < num_values; i++)
+ {
+ total_weight = total_weight + values[i];
+ }
+ double mean_weight = (double) total_weight / (double) num_values;
+ double standard_sum = 0;
+ for(i = 0; i < num_values; i++)
+ {
+ standard_sum += sqr(values[i] - mean_weight);
+ }
+ standard_sum = standard_sum / (double) num_values;
+ return sqrt(standard_sum);
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/histogram.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,49 @@
+#ifndef HISTOGRAM
+#define HISTOGRAM 1
+
+class Histogram
+{
+/* just an array of integers with some statistical information */
+/* consider implementing with an intarray (from hw1) instead */
+ private:
+ double mean;
+ double variance;
+ double standard_dev;
+
+ int num_rows;
+ int* row_weights;
+ double cut_angle;/* this doesn't really belong here, but who cares */
+ void initialize_mean();
+ void initialize_variance();
+ void initialize_standard_dev();
+ public:
+ display();
+ Histogram(int n_rows, int* r_weights, double c_angle);
+ inline int get_row_weight(int row)
+ {
+ return row_weights[row];
+ }
+ inline int get_num_rows()
+ {
+ return num_rows;
+ }
+ inline double get_mean()
+ {
+ return mean;
+ }
+ inline double get_standard_dev()
+ {
+ return standard_dev;
+ }
+ inline double get_variance()
+ {
+ return variance;
+ }
+ inline double get_cut_angle()
+ {
+ return cut_angle;
+ }
+};
+
+#endif
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/init_small_display.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,21 @@
+# the windows in here were for debugging skew etc.
+# not all that important anymore
+toplevel .t
+wm geometry .t 600x710+0+0
+wm title .t "Image Display"
+frame .t.f -width 600 -height 710
+pack .t.f
+
+canvas .t.f.c -xscrollcommand ".t.f.xscroller set" -yscrollcommand ".t.f.yscroller set" -width 700 -height 700 -background gray -xscrollincrement 40 -yscrollincrement 40
+
+scrollbar .t.f.xscroller -command ".t.f.c xview" -orient horizontal
+scrollbar .t.f.yscroller -command ".t.f.c yview"
+pack .t.f.xscroller -side bottom -fill x
+pack .t.f.c .t.f.yscroller -side left -fill y
+# .t.f.c create bitmap 100 100 -bitmap @/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/tif/test
+toplevel .histogram
+wm title .histogram "Skew Projections"
+wm geometry .histogram 500x800+620+0
+canvas .histogram.c -width 800 -height 800
+pack .histogram.c
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/learn.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,395 @@
+/*--------------------------------------------------------------
+ Learn.cc -
+ readlearnfiles - sources the tcl file to change learn files
+ learn(char * tifffile, char * asciifile)
+ Performs character learning by reading tiff and ascii translation
+ Characters are partitioned into character groups as described
+ in system.cc. See learn function for more details
+
+ writeLearnedGroups(char * filename) Writes learned character to file
+ readLearnedGroups(char * filename) Reads saved learned characters
+ from file.
+---------------------------------------------------------------*/
+#include "tcl_interface.h"
+#include "system.h"
+#include "learn.h"
+#include "Page.h"
+#include "list.h"
+
+void readLearnFiles()
+/*--------------------------------------------------------------
+Primary Purpose: Sources learnfile.tcl where new learn files can be
+specified without recompiling **/
+{
+ docommand("source learnfile.tcl");
+}
+
+bool whitespace(char c)
+// Returns TRUE if c is a whitespace charater (called by learn.cc)
+{
+ if ( c == '\n' || c == '\t' || c == ' ') return TRUE;
+ return FALSE;
+
+}
+
+
+void learn(char * tifFile, char * asciiFile)
+/*--------------------------------------------------------------
+Primary Purpose: Learns from TIFF and ascii file. Groups learned
+ characters by baseline into LearnedGroups and
+ sets properties.
+Arguments: tiffFile name of a tiff file to learn from
+ asciiFile name of an ascii translation file
+Effects: Assumes a one to one correspondence between each connected
+component on a line of the tif file and each character on the corresponding
+line of the ascii file.
+
+Rev: 11/20/95
+---------------------------------------------------------------*/
+{
+ FILE * transFile;
+
+ transFile = fopen(asciiFile,"r");
+ if(!transFile)
+ {
+ printf("Could not open the ascii learn file");
+ return;
+ }
+ if (LearnedGroups == NULL)
+ LearnedGroups = new Components[NumCharGroups];
+
+ Page * learnPage = new Page;
+ initCharBitsSet();
+ if(learnPage->readMap(tifFile) != VALID)
+ {
+ printf("Problem opening the learn image file (file doesn't exist?)\n");
+ return;
+ }
+ learnPage->setLines();
+ learnPage->extractComponents();
+ int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize;
+ char buffer[maxCharsPerLine];
+ int i = 0;
+ int buflength;
+ bool emptyLine;
+ Components * components;
+ Component * item;
+
+ double width, height = 0.0;
+ int h;
+
+ while (i < learnPage->numLines() &&
+ fgets(buffer, maxCharsPerLine, transFile))
+ {
+ buflength = strlen(buffer);
+ components = learnPage->line(i++);
+ int c = 0;
+ for (ListElement* ptr = components->first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ item = (Component *)(ptr->item);
+
+ // skip over white space
+ while(whitespace(buffer[c]) && c < buflength)c++;
+
+ if (c >= buflength)
+ break;
+
+ item->asciiId() = buffer[c++];
+
+ LearnedGroups[item->charGroup].Append((void*) item);
+ ptr->item = NULL; // Set to Null in page so it wont get
+ // clobbered on delete
+ h = item->lr().y() - item->ul().y();
+ if (h > height) height = h;
+ width = item->lr().x() - item->ul().x();
+ if (height/width > MaxHWRatio)
+ MaxHWRatio = height/width;
+
+ if (h/width < MinHWRatio)
+ MinHWRatio = h/width;
+
+ if (width < MinWidth)
+ MinWidth = (int) width;
+
+/* printf("learned char %c, group %d\n", item->asciiId(),
+ item->charGroup);
+*/
+
+ }
+
+ }
+
+
+ if (fgets(buffer, maxCharsPerLine, transFile))
+ printf("Uh, oh. There are more characters to learn!\n");
+ /* printf("Maximum height/width ratio = %f\n", MaxHWRatio); */
+ /* printf("Minimum height/width ratio = %f\n", MinHWRatio); */
+ delete learnPage;
+}
+
+
+int writeLearnedGroups(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Write Learned groups out to file for reading
+ in by readLearnedGroups
+Arguments: filename to write learned chars to
+Return Value: 1 if successful 0 if not
+Effects: Writes contents of LearnedGroups array out to filename
+LearnedGroups is an array of lists of components that is decleared
+in system.cc and initialized by the learn() function.
+For each group writes the number of Components the group contains
+followed by the group data.
+Other learned values such as MinWidth MinHWRatio etc are written to
+the file as well.
+Constraints: LearnedGroups must be initialized and filled with learned
+chars before this function is invoked.
+Rev: 11/27 KM
+---------------------------------------------------------------*/
+{
+ int status;
+ FILE * outfile;
+ assert(LearnedGroups != NULL);
+
+ outfile = fopen(filename, "w");
+ if (outfile == NULL)
+ {
+ printf("error openning %s \n", filename);
+ return 0;
+ }
+
+ // Write global information about learned characters
+
+ fwrite(&NumCharGroups, sizeof(NumCharGroups),1, outfile);
+ fwrite(&MaxHWRatio, sizeof(MaxHWRatio),1, outfile);
+ fwrite(&MinWidth, sizeof(MinWidth),1,outfile);
+ fwrite(&MinHWRatio, sizeof(MinHWRatio),1,outfile);
+ for(unsigned int i = 0; i < NumCharGroups; i++)
+ {
+ unsigned int numChars = LearnedGroups[i].length;
+ // Write group number and number of characters
+ fwrite(&i, sizeof(i), 1, outfile);
+ status = fwrite(&numChars, sizeof(numChars),1,outfile);
+ if (status == 0) return 0;
+ for(ListElement * ptr = LearnedGroups[i].first;
+ ptr != NULL; ptr = ptr->next)
+ {
+
+ Component * comp = (Component *) ptr->item;
+
+ status = fwrite(comp, sizeof(Component),1,outfile);
+// printf("\tChar:%c status:%d \n", comp->asciiId(), status);
+
+ for(int p = 0; p < numProperties; p++)
+ {
+ status = fwrite(&(comp->fproperty[p]),
+ sizeof(Property),
+ 1, outfile);
+ if (status == 0)
+ {
+ printf("Error writing properties of comp %c",
+ comp->asciiId());
+ return 0;
+ }
+ }
+ }
+ }
+ status = fclose(outfile);
+ if (status == -1) return 0;
+ else return 1;
+
+}
+
+int readLearnedGroups(char * filename)
+/*--------------------------------------------------------------
+Primary Purpose: Read Learned groups from file that has been
+ created by writeLearnedGroups
+Arguments: filename to read learned chars from
+Return Value: 1 if successful 0 if not
+Effects: Reads contents of filename into LearnedGroups array
+LearnedGroups is an array of lists of components that is decleared
+in system.cc and initialized here or in the learn() function.
+Constraints: LearnedGroups must not yet be initialized
+Rev: 11/27 KM
+---------------------------------------------------------------*/
+{
+ int status;
+ FILE * infile;
+ unsigned int numGroups; // # of groups stored in file.
+
+ initCharBitsSet();
+ if(LearnedGroups == NULL)
+ LearnedGroups = new Components[NumCharGroups];
+
+
+ infile = fopen(filename, "r");
+ if (infile == NULL)
+ {
+ printf("error openning %s \n", filename);
+ return 0;
+ }
+
+ // Read Globals
+ fread(&numGroups, sizeof(numGroups),1, infile);
+ assert(numGroups == NumCharGroups);
+ fread(&MaxHWRatio, sizeof(MaxHWRatio),1, infile);
+ fread(&MinWidth, sizeof(MinWidth),1,infile);
+ fread(&MinHWRatio, sizeof(MinHWRatio),1,infile);
+ for(unsigned int i = 0; i < NumCharGroups; i++)
+ {
+ unsigned int groupnum;
+ unsigned int numChars;
+ fread(&groupnum, sizeof(groupnum), 1, infile);
+ assert(groupnum == i);
+ fread(&numChars, sizeof(numChars),1,infile);
+
+ printf("\nReading group %d - %d characters\n",i,numChars);
+ for(unsigned int c = 0; c< numChars; c++)
+ {
+ Component * comp = new Component;
+ short int * savepropptr = comp->fproperty;
+ status = fread(comp, sizeof(Component),1,infile);
+ comp->fproperty = savepropptr;
+ for(int p = 0; p < numProperties; p++)
+ {
+ status = fread(&(comp->fproperty[p]), sizeof(Property),
+ 1, infile);
+ if (status == 0)
+ {
+ printf("Error reading properties");
+ return 0;
+ }
+ }
+// printf("\tChar:%c status:%d ", comp->asciiId(), status);
+// printVector(comp->properties(), numProperties);
+ LearnedGroups[i].Append(comp);
+
+ }
+
+ }
+ status = fclose(infile);
+ if (status == -1) return 0;
+ else return 1;
+}
+
+void testLearn()
+{
+
+ learn("/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/train.tif",
+ "/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/train.txt");
+}
+
+/*****************************************************************
+ FUNCTIONS BEYOND THIS POINT ARE FOR AVERAGING LEARNED CHARACTERS
+ AND ARE NOT CURRENTLY USED.
+*******************************************************************/
+
+void initLearnedChars()
+/*--------------------------------------------------------------
+Primary Purpose: Initializes learned character array. Sets asciiId
+to array offset.
+Rev: KM 11/6/95
+---------------------------------------------------------------*/
+{
+ LearnedChars = new Component[256];
+
+ for (int i=0; i < 256; i++)
+ {
+ LearnedChars[i].asciiId() = (char)i;
+ }
+
+}
+
+void oldlearn(char * tifFile, char * asciiFile)
+/*--------------------------------------------------------------
+Primary Purpose: builds property vectors for LearnedChars array
+Arguments: tiffFile name of a tiff file to learn from
+ asciiFile name of an ascii translation file
+Effects: Assumes a one to one correspondence between each connected
+component on a line of the tif file and each character on the corresponding
+line of the ascii file. For learned characters confidence is set
+to the number of examples.
+
+Rev: 11/6/95
+---------------------------------------------------------------*/
+{
+ FILE * transFile;
+ transFile = fopen(asciiFile,"r");
+ Page * learnPage = new Page;
+ initCharBitsSet();
+ learnPage->readMap(tifFile);
+ learnPage->setLines();
+ learnPage->extractComponents(); /* why minlinesize? */
+ int maxCharsPerLine = learnPage->bmap()->imageWidth() / MinLineSize;
+ char buffer[maxCharsPerLine];
+ int i = 0;
+ int buflength;
+ bool emptyLine;
+ Components * components;
+ Component * item;
+ int count[256]; // a count of how many of each char have been encountered
+ int prop[256][numProperties]; // Character property sums. Need ints so that
+ // property sum does
+ // not exceed char boundaries
+ char id;
+
+ initLearnedChars();
+ for (i = 0; i < 256; i++)
+ {
+ count[i] = 0;
+ for (int p = 0; p < numProperties; p++)
+ prop[i][p] = 0;
+ }
+ i=0;
+
+ int offset;
+ while (i < learnPage->numLines() &&
+ fgets(buffer, maxCharsPerLine, transFile))
+ {
+ buflength = strlen(buffer);
+ components = learnPage->line(i++);
+ int c = 0;
+ for (ListElement* ptr = components->first; ptr != NULL;
+ ptr = ptr->next)
+ {
+ item = (Component *)(ptr->item);
+ // skip over white space
+ while(whitespace(buffer[c]) && c < buflength)c++;
+ if (c >= buflength)break;
+ id = buffer[c++];
+ count[id]++; // increment character count
+ for (offset=0; offset < numProperties; offset++)
+ prop[id][offset] += (item->properties())[offset];
+ LearnedChars[i].numBits() += item->numBits();
+ }
+ }
+ // now divide by count and put in Learned character
+ for(int j = 0; j < 256; j++)
+ {
+ if(count[j] > 0)
+ {
+ for (int offset=0; offset < numProperties; offset++)
+ prop[j][offset] /= count[j];
+ LearnedChars[j].numBits() /= count[j];
+ LearnedChars[j].confid() = count[j];
+ for (offset=0; offset < numProperties; offset++)
+ (LearnedChars[j].properties())[offset] = prop[j][offset];
+// printf("%d occurrences of %c\n", count[j], (char)j);
+ printVector(LearnedChars[j].properties(), numProperties);
+
+ }
+
+ }
+}
+
+void oldtestLearn()
+{
+
+
+ learn("train.tif", "train.txt");
+ if (ENABLE_USER_INTERFACE)
+ docommand(".main_window.display.work_space delete IMAGE_TAG"); //6/16/00
+ //docommand("button .b -text \"hello\" -command exit \n pack .b\n");
+
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/learn.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,13 @@
+// learn.h
+// Functions for use with LearnedcharGroups array
+void learn(char * tifFile, char * asciiFile); // assumes "perfect" image
+void readLearnFiles(); // sources learnfile.tcl where learn files can be set
+
+int writeLearnedGroups(char * filename); // Save learned characters
+int readLearnedGroups(char * filename); // read back saved characters
+void testLearn();
+
+// oldlearn for use with LearnedChars array
+// averaging learning ***NOT CURRENTLY USED ***
+void oldlearn(char * tifFile, char * asciiFile); // averages values
+void initLearnedChars(); // sets learned charactere array to 0
Binary file reference/ocr-simple/learned has changed
Binary file reference/ocr-simple/learnedGroups.dat has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/learnfile.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,6 @@
+# one line each for files that need to be learned
+# form: LEARN <imagefile> <textfile>
+# or: LEARN_DATA <datafile>
+# LEARN TIF/tomquotes.tif tomquotes.txt
+# LEARN at_top/train.tif train.txt
+LEARN_DATA learned
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/line_info.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,41 @@
+#ifndef LINE_INFO
+#define LINE_INFO 1
+
+class LineMarker {
+ private:
+ int start_row;
+ int end_row;
+ public:
+ LineMarker(int st, int fi);
+ inline int get_start_row()
+ {
+ return start_row;
+ }
+ inline int get_end_row()
+ {
+ return end_row;
+ }
+};
+
+class LineInfo {
+ private:
+ int num_lines;
+ LineMarker* line_marks;
+ public:
+ LineInfo(LineMarker* line_m, int num_l);
+ inline int get_num_lines()
+ {
+ return num_lines;
+ }
+ inline int get_line_n_start(int n)
+ {
+ return line_marks[n].get_start_row();
+ }
+ inline int get_line_n_end(int n)
+ {
+ return line_marks[n].get_end_row();
+ }
+
+};
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/link.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,61 @@
+#include <tcl.h>
+#include "link.h"
+#include "tcl_interface.h"
+#include "system.h"
+
+/*
+
+To link tcl and C variables: (variable num)
+
+int num;
+LINK_VARIABLE(num, INT);
+
+make sure the variable is external and in system.h
+so that everyone can get at it
+
+*/
+
+
+int DISPLAY_SPELLING_MISTAKES = 0;
+extern Tcl_Interp* TCL_ip;
+
+#define LINK_VARIABLE(name, type) \
+Tcl_LinkVar(TCL_ip, #name, (void*) &name, TCL_LINK_##type)
+
+int init_link_vars()
+{
+ LINK_VARIABLE(ENABLE_USER_INTERFACE, INT); // 0 turns the display off
+ LINK_VARIABLE(VERY_LOW_CONFIDENCE, INT); // Words that display in red
+ LINK_VARIABLE(LOW_CONFIDENCE, INT); // Words that will be displayed in blue
+ LINK_VARIABLE(DISPLAY_LINE_BOUNDARIES, INT); // Words that display in blue
+ LINK_VARIABLE(DISPLAY_BOUNDING_BOXES, INT); // Display component boundaries
+ LINK_VARIABLE(SPELLCHECK, INT); // should the page be spellchecked
+
+
+ LINK_VARIABLE(DISPLAY_IMAGE, INT); // display the scanned image?
+ LINK_VARIABLE(DESKEW_METHOD, INT); // 1 for rle, 0 for bitmap
+ LINK_VARIABLE(DISPLAY_SPELLING_MISTAKES, INT);
+ LINK_VARIABLE(SCALE_FACTOR, DOUBLE);
+ LINK_VARIABLE(NoiseTolerance, INT);// Minimum num of pixels in line of text
+
+ LINK_VARIABLE(MinLineSize, INT); // Minimum number of rows in line of text
+ LINK_VARIABLE(MinVertSeparation, INT); // Minimum number of rows between lines of text
+ LINK_VARIABLE(MinHorizSeparation, INT); // Minimum number of blank pixels btween chars
+ LINK_VARIABLE(ConfidenceThreshold, INT);// Minimum confidence for some operations
+ LINK_VARIABLE(JoinTolerance, INT); // Maximum number of pixels
+ // joining two fused characters
+
+ // The next 4 are used in character grouping
+ LINK_VARIABLE( MaxVertSize, INT); // Max vert pixels in char
+ LINK_VARIABLE( BaseLineTolerance, INT); // How far from baseline is
+ // okay 1/INT * linesize
+ LINK_VARIABLE( TopLineTolerance, INT); // How far from topline
+ //is okay 1/INT *linesize
+
+ /* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+ LINK_VARIABLE( NumHorizDiv, INT); //Number of horizontal divisions
+ LINK_VARIABLE( NumVertDiv, INT); //Number of vertical divisions
+ LINK_VARIABLE( SCALE_FACTOR, DOUBLE);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/link.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,42 @@
+#ifndef link__h
+#define link__h
+
+int init_link_vars();
+extern int ENABLE_USER_INTERFACE;
+extern int DISPLAY_SPELLING_MISTAKES;
+extern int NoiseTolerance; // Minimum number of pixels in row of text
+extern int MinLineSize; // Minimum number of rows in a line of text
+extern int MinVertSeparation; // Minimum number of rows between lines of tex
+extern int MinHorizSeparation; // Minimum number of blank pixels btween chars
+extern int ConfidenceThreshold;// Minimum confidence for some operations
+extern int JoinTolerance; // Maximum number of pixels in a column
+ // joining two fused characters
+
+extern double MaxHWRatio; // Max H/W ratio of learned set
+extern int MinWidth; // minimum component width in learned set
+
+// The next three are used in character grouping
+extern unsigned int NumCharGroups;
+extern int MaxVertSize; // Max vert pixels in char (used for baseline)
+extern int BaseLineTolerance; // How far from baseline is okay 1/%linesize
+extern int TopLineTolerance; // How far from topline is okay 1/%linesize
+
+/* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+extern int NumHorizDiv; //Number of horizontal divisions
+extern int NumVertDiv; //Number of vertical divisions
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/link_vars.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,23 @@
+set VERY_LOW_CONFIDENCE 120
+set LOW_CONFIDENCE 160
+set DISPLAY_BOUNDING_BOXES 0
+set DISPLAY_LINE_BOUNDARIES 0
+set SPELLCHECK 0
+set ENABLE_USER_INTERFACE 1
+set DISPLAY_IMAGE 1
+# deskew method 0 uses bitmap rotation
+# 1 an RLEMap rotation
+# -1 no rotation at all
+set DESKEW_METHOD 0
+set SCALE_FACTOR 1
+
+set NoiseTolerance 1
+set MinLineSize 5
+set MinVertSeparation 0
+set MinHorizSeparation 1
+set ConfidenceThreshold 150
+
+set JoinTolerance 6
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/link_vars_original.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,20 @@
+set VERY_LOW_CONFIDENCE 120
+set LOW_CONFIDENCE 160
+set DISPLAY_BOUNDING_BOXES 0
+set DISPLAY_LINE_BOUNDARIES 0
+set SPELLCHECK 0
+set ENABLE_USER_INTERFACE 0
+set DISPLAY_IMAGE 1
+# deskew method 0 uses bitmap rotation
+# 1 an RLEMap rotation
+# -1 no rotation at all
+set DESKEW_METHOD 0
+set SCALE_FACTOR 1
+
+set NoiseTolerance 1
+set MinLineSize 5
+set MinVertSeparation 0
+set MinHorizSeparation 1
+set ConfidenceThreshold 150
+
+set JoinTolerance 6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/list.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,314 @@
+// list.cc
+//
+// Routines to manage a singly-linked list of "things".
+//
+// A "ListElement" is allocated for each item to be put on the
+// list; it is de-allocated when the item is removed. This means
+// we don't need to keep a "next" pointer in every object we
+// want to put on a list.
+//
+// NOTE: Mutual exclusion must be provided by the caller.
+// If you want a synchronized list, you must use the routines
+// in synchlist.cc.
+//
+// Copyright (c) 1992-1993 The Regents of the University of California.
+// All rights reserved. See copyright.h for copyright notice and limitation
+// of liability and disclaimer of warranty provisions.
+
+
+#include "list.h"
+
+//----------------------------------------------------------------------
+// ListElement::ListElement
+// Initialize a list element, so it can be added somewhere on a list.
+//
+// "itemPtr" is the item to be put on the list. It can be a pointer
+// to anything.
+// "sortKey" is the priority of the item, if any.
+//----------------------------------------------------------------------
+
+ListElement::ListElement(void *itemPtr, int sortKey)
+{
+ item = itemPtr;
+ key = sortKey;
+ next = NULL; // assume we'll put it at the end of the list
+ previous = NULL;
+}
+
+//----------------------------------------------------------------------
+// List::List
+// Initialize a list, empty to start with.
+// Elements can now be added to the list.
+//----------------------------------------------------------------------
+
+List::List()
+{
+ first = last = NULL;
+ length = 0;
+}
+
+//----------------------------------------------------------------------
+// List::~List
+// Prepare a list for deallocation. If the list still contains any
+// ListElements, de-allocate them. However, note that we do *not*
+// de-allocate the "items" on the list -- this module allocates
+// and de-allocates the ListElements to keep track of each item,
+// but a given item may be on multiple lists, so we can't
+// de-allocate them here.
+//----------------------------------------------------------------------
+
+List::~List()
+{
+ while (Remove() != NULL)
+ ; // delete all the list elements
+}
+
+//----------------------------------------------------------------------
+// List::Append
+// Append an "item" to the end of the list.
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, put it at the end.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+//----------------------------------------------------------------------
+
+void
+List::Append(void *item)
+{
+ ListElement *element = new ListElement(item, 0);
+
+ if (IsEmpty()) { // list is empty
+ first = element;
+ last = element;
+ } else { // else put it after last
+ last->next = element;
+ element->previous = last;
+ last = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::Prepend
+// Put an "item" on the front of the list.
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, put it at the beginning.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+//----------------------------------------------------------------------
+
+void
+List::Prepend(void *item)
+{
+ ListElement *element = new ListElement(item, 0);
+
+ if (IsEmpty()) { // list is empty
+ first = element;
+ last = element;
+ } else { // else put it before first
+ element->next = first;
+ first->previous = element;
+ first = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::Remove
+// Remove the first "item" from the front of the list.
+//
+// Returns:
+// Pointer to removed item, NULL if nothing on the list.
+//----------------------------------------------------------------------
+
+void *
+List::Remove()
+{
+ return SortedRemove(NULL); // Same as SortedRemove, but ignore the key
+ length--;
+}
+
+//----------------------------------------------------------------------
+// List::Mapcar
+// Apply a function to each item on the list, by walking through
+// the list, one element at a time.
+//
+// Unlike LISP, this mapcar does not return anything!
+//
+// "func" is the procedure to apply to each element of the list.
+//----------------------------------------------------------------------
+
+void
+List::Mapcar(VoidFunctionPtr func)
+{
+ for (ListElement *ptr = first; ptr != NULL; ptr = ptr->next) {
+ (*func)((int)ptr->item);
+ }
+}
+
+//----------------------------------------------------------------------
+// List::IsEmpty
+// Returns TRUE if the list is empty (has no items).
+//----------------------------------------------------------------------
+
+bool
+List::IsEmpty()
+{
+ if (first == NULL)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+//----------------------------------------------------------------------
+// List::SortedInsert
+// Insert an "item" into a list, so that the list elements are
+// sorted in increasing order by "sortKey".
+//
+// Allocate a ListElement to keep track of the item.
+// If the list is empty, then this will be the only element.
+// Otherwise, walk through the list, one element at a time,
+// to find where the new item should be placed.
+//
+// "item" is the thing to put on the list, it can be a pointer to
+// anything.
+// "sortKey" is the priority of the item.
+//----------------------------------------------------------------------
+
+void
+List::SortedInsert(void *item, int sortKey)
+{
+ ListElement *element = new ListElement(item, sortKey);
+ ListElement *ptr; // keep track
+
+ if (IsEmpty()) { // if list is empty, put
+ first = element;
+ last = element;
+ } else if (sortKey < first->key) {
+ // item goes on front of list
+ element->next = first;
+ first->previous = element;
+ first = element;
+ } else { // look for first elt in list bigger than item
+ for (ptr = first; ptr->next != NULL; ptr = ptr->next) {
+ if (sortKey < ptr->next->key) {
+ element->next = ptr->next;
+ element->previous = ptr;
+ ptr->next->previous = element;
+ ptr->next = element;
+ return;
+ }
+ }
+ last->next = element; // item goes at end of list
+ element->previous = last;
+ last = element;
+ }
+ length++;
+}
+
+//----------------------------------------------------------------------
+// List::SortedRemove
+// Remove the first "item" from the front of a sorted list.
+//
+// Returns:
+// Pointer to removed item, NULL if nothing on the list.
+// Sets *keyPtr to the priority value of the removed item
+// (this is needed by interrupt.cc, for instance).
+//
+// "keyPtr" is a pointer to the location in which to store the
+// priority of the removed item.
+//----------------------------------------------------------------------
+
+void *
+List::SortedRemove(int *keyPtr)
+{
+ ListElement *element = first;
+ void *thing;
+
+ if (IsEmpty())
+ return NULL;
+
+ thing = first->item;
+ if (first == last) { // list had one item, now has none
+ first = NULL;
+ last = NULL;
+ } else {
+ first = element->next;
+ if (first != NULL)
+ first->previous = NULL;
+ }
+ if (keyPtr != NULL)
+ *keyPtr = element->key;
+ delete element;
+ length--;
+ return thing;
+}
+
+
+
+void List::insertAfter(ListElement * listEl, void *item)
+ // insert a new item after this one
+{
+ ListElement *newElement = new ListElement(item, 0);
+ newElement->next = listEl->next;
+ newElement->previous = listEl;
+ listEl->next = newElement;
+ if (last == listEl)
+ last = newElement;
+ length++;
+}
+
+
+void List::insertBefore(ListElement * listEl, void *item)
+ // insert a new item before this one
+{
+ ListElement *newElement = new ListElement(item, 0);
+ newElement->next = listEl;
+ newElement->previous = listEl->previous;
+ listEl->previous = newElement;
+ if (first == listEl)
+ first = newElement;
+ length++;
+}
+
+
+
+void List::removeAt(ListElement * listEl)
+ // removes listEl from the list. Do not delete it from memory
+{
+ if(first != listEl)
+ (listEl->previous)->next = listEl->next;
+ else
+ first = listEl->next;
+ if(last != listEl)
+ (listEl->next)->previous = listEl->previous;
+ else
+ last = listEl->previous;
+// delete listEl;
+ length --;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/list.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,93 @@
+
+// list.h
+// Data structures to manage LISP-like lists.
+//
+// As in LISP, a list can contain any type of data structure
+// as an item on the list: thread control blocks,
+// pending interrupts, etc. That is why each item is a "void *",
+// or in other words, a "pointers to anything".
+//
+// Copyright (c) 1992-1993 The Regents of the University of California.
+// All rights reserved. See copyright.h for copyright notice and limitation
+// of liability and disclaimer of warranty provisions.
+
+#ifndef LIST_H
+#define LIST_H
+//#include <bool.h>
+#include "Boolean.h" //rjf 6.2000
+
+#include <cstdlib>
+#include <stddef.h> //rjf 6.2000
+// This declares the type "VoidFunctionPtr" to be a "pointer to a
+// function taking an integer argument and returning nothing". With
+// such a function pointer (say it is "func"), we can call it like this:
+//
+// (*func) (17);
+//
+// Used by MapCar in list.h
+
+typedef void (*VoidFunctionPtr)(int arg);
+
+// The following class defines a "list element" -- which is
+// used to keep track of one item on a list. It is equivalent to a
+// LISP cell, with a "car" ("next") pointing to the next element on the list,
+// and a "cdr" ("item") pointing to the item on the list.
+//
+// Internal data structures kept public so that List operations can
+// access them directly.
+
+class ListElement {
+ public:
+ ListElement(void *itemPtr, int sortKey); // initialize a list element
+
+ ListElement *next; // next element on list,
+ // NULL if this is the last
+ ListElement *previous; // previous element on the list
+ // NULL if this is the first element
+ // put types int before these guys 7/00 RJF
+ int insertAfter(void *item); // insert a new item after this one
+ int insertBefore(void *item); // insert a new item before this one
+ int remove(void * item); // remove this item
+
+ int key; // priority, for a sorted list
+ void *item; // pointer to item on the list
+};
+
+// The following class defines a "list" -- a singly linked list of
+// list elements, each of which points to a single item on the list.
+//
+// By using the "Sorted" functions, the list can be kept in sorted
+// in increasing order by "key" in ListElement.
+
+class List {
+ public:
+ List(); // initialize the list
+ ~List(); // de-allocate the list
+
+ void Prepend(void *item); // Put item at the beginning of the list
+ void Append(void *item); // Put item at the end of the list
+ void *Remove(); // Take item off the front of the list
+
+ void Mapcar(VoidFunctionPtr func); // Apply "func" to every element
+ // on the list
+ bool IsEmpty(); // is the list empty?
+ void insertAfter(ListElement * listEl, void *item);
+ void insertBefore(ListElement * listEl, void *item);
+ void removeAt(ListElement * listEl);
+
+ // Routines to put/get items on/off list in order (sorted by key)
+ void SortedInsert(void *item, int sortKey); // Put item into list
+ void *SortedRemove(int *keyPtr); // Remove first item from list
+
+
+ int length; // Length of list
+ ListElement *first; // Head of the list, NULL if list is empty
+ ListElement *last; // Last element of list
+
+};
+
+#endif // LIST_H
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/main.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,92 @@
+#include <tcl.h>
+#include <tk.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <cstdlib>
+#include <math.h>
+#include "system.h"
+#include "Page.h"
+#include "tcl_interface.h"
+
+
+#define QUIT 0
+#define PAUSE 0
+
+/*
+
+Creates a tcl interpereter, link variable capabilities,
+and "docommand" function--Thanks to: Keiji Kanazawa
+
+To execute a line in tcl: (set x 4)
+
+docommand("set x 4");
+
+also
+
+docommand("set x %d", 4);
+
+or
+
+docommand("set %s %d", "x", 4); this might not work ("x" on the stack)
+
+started 10/95 Archie Russell
+
+*/
+
+
+
+Tcl_Interp* TCL_ip;
+Tk_Window main_window;
+Page* global_page = new Page;
+
+
+int main(int argc, char** argv)
+{
+ initialize_interpreter();
+ initialize_command_procs();
+ initialize_link_vars();
+ if(ENABLE_USER_INTERFACE)
+ {
+ load_user_interface();
+ }
+
+ Page * testPage;
+ int return_code;
+ testPage = new Page;
+ if(ENABLE_USER_INTERFACE)
+ readLearnFiles();
+ else
+ {
+ if(argc == 3 && strcmp(argv[2], "nolearn") == 0)
+ readLearnedGroups("learnedGroups.dat");
+ else
+ {
+ learn("train.tif", "train.txt");
+ writeLearnedGroups("learnedGroups.dat");
+ }
+ }
+ if(!ENABLE_USER_INTERFACE) {
+ testPage->readMap(argv[1]);
+ testPage->deskew(0);
+ testPage->setLines();
+ testPage->extractComponents();
+ testPage->recognize();
+ testPage->extractWords();
+ testPage->printComponents();
+ testPage->writeWordPos("wordpos.out");
+ testPage->writeAscii("ascii.out");
+ delete testPage;
+ }
+ if(ENABLE_USER_INTERFACE)
+ while(1)
+ {
+ Tcl_DoOneEvent(TCL_DONT_WAIT); //6/21/00
+ }
+ exit(0);
+}
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/new_ui.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,807 @@
+#
+# user interface code (tcl visuals) for OCR
+# started 9/95, Archie Russell
+
+append OCRCHIE_ROOT [pwd] "/"
+append face_image $OCRCHIE_ROOT face_happy.xbm
+append eye_image $OCRCHIE_ROOT eye.xbm
+append write_image $OCRCHIE_ROOT edit2.xbm
+
+set xvprocess "0"
+set main_window_width 800
+set main_window_height 800
+set dummy 0
+# I'd like to be able to use the above parameters in here,
+# but I think tcl might get a little angry if I try
+# the size of the window, and the position of its upper left
+set main_window_geometry 800x800+200+100
+
+set menu_bar_width $main_window_width
+set menu_bar_height 50
+set button_bar_width $main_window_width
+set button_bar_height 50
+set display_height 400
+set edit_window_height 300
+set quit_dialog_geometry 300x135+500+500
+set save_ascii_geometry 275x140+500+500
+# save a little room for scrollbars, etc.
+
+set BACKGROUND #CCCCCC
+set FOREGROUND #000000
+# set FONT -bitstream-*-medium-r-normal--26-171-110-110-p-150-iso8859-1
+set FONT -bitstream-*-medium-r-normal--19-140-85-85-p-110-hp-roman8
+set SMALLFONT -bitstream-*-medium-r-normal--19-140-85-85-p-110-hp-roman8
+
+set EDIT_BACKGROUND #000000
+set LOW_PRECISION_BACKGROUND blue
+set MISPELLED_BACKGROUND SeaGreen
+set UNKNOWN_CHAR_BACKGROUND red
+
+set scroll_inc 30
+
+
+set canvas_width [expr $main_window_width - 30]
+
+
+proc init_user_interface {} {
+
+# tcl requires declaration of global variables used in a fxn
+
+ global main_window_geometry main_window_width main_window_height menu_bar_width menu_bar_height button_bar_width button_bar_height display_height BACKGROUND FOREGROUND EDIT_BACKGROUND edit_window_height
+
+# toplevel windows are at the same level as 'xterms'
+ toplevel .main_window
+
+# $ sign means 'get the value' (otherwise uses the string)
+ wm geometry .main_window $main_window_geometry
+ wm title .main_window "OCR user interface"
+ .main_window configure -background $BACKGROUND
+# frames are subwindows that are there mostly to 'hold' other windows
+ frame .main_window.menu_bar -width $menu_bar_width -height $menu_bar_height -relief raised -bd 2 -background $BACKGROUND
+ init_menu_bar
+
+ frame .main_window.button_bar -width $button_bar_width -height $button_bar_height -background $BACKGROUND
+ init_button_bar
+
+
+ init_display
+
+ frame .main_window.edit_window -width $main_window_width -height $edit_window_height -relief ridge -bd 5 -bg $EDIT_BACKGROUND
+ init_edit_window
+
+# pack puts things together: this will put the menu_bar window just above the button_bar_window above the display
+
+ pack .main_window.menu_bar -side top -fill x
+ pack .main_window.button_bar .main_window.display .main_window.edit_window -side top -anchor w
+ focus .main_window
+}
+
+proc init_menu_bar { } {
+# this command initializes the main menu bar (stuff like file, etc)
+# shortcuts not working! why?
+ global BACKGROUND FOREGROUND FONT
+
+ menubutton .main_window.menu_bar.file -text "File " -menu .main_window.menu_bar.file.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_file_menu
+
+ menubutton .main_window.menu_bar.tools -text "Tools " -menu .main_window.menu_bar.tools.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_tools_menu
+
+ menubutton .main_window.menu_bar.options -text "Options " -menu .main_window.menu_bar.options.menu -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+ init_options_menu
+
+ pack .main_window.menu_bar.file .main_window.menu_bar.tools .main_window.menu_bar.options -side left -padx 1m -pady 1m
+
+# if we ever put in the help junk...
+# menubutton .main_window.menu_bar.help -text "Help" -menu .main_window.menu_bar.options.help -borderwidth 2 -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# pack .main_window.menu_bar.help -side right
+}
+
+proc init_file_menu { } {
+# this creates the menu associated with the file menubutton
+ global FOREGROUND BACKGROUND FONT
+ menu .main_window.menu_bar.file.menu -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.file.menu add command -label "Open..." -command popup_open_menu
+ .main_window.menu_bar.file.menu add command -label "Close" -command close_document
+ .main_window.menu_bar.file.menu add separator
+# .main_window.menu_bar.file.menu add command -label "Save TIFF" -command popup_save_tiff_menu
+ .main_window.menu_bar.file.menu add command -label "Save ASCII" -command popup_save_ascii_menu
+ .main_window.menu_bar.file.menu add command -label "Save WORD/POS" -command popup_save_word_pos_menu
+ .main_window.menu_bar.file.menu add command -label "Save Learned Characters" -command popup_save_learned_chars_menu
+ .main_window.menu_bar.file.menu add command -label "Read Learned Characters" -command popup_read_learned_chars_menu
+
+# .main_window.menu_bar.file.menu add command -label "Save Setup" -command popup_save_setup_menu
+# a separator is just a horizontal line for show
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Quit" -command popup_quit_dialog_box
+}
+
+proc close_document { } {
+ .main_window.edit_window.text_part delete 1.0 end
+ .main_window.display.work_space delete all
+ set COLORED_WORDS {}
+ DEALLOCATE_PAGE
+}
+
+
+proc init_tools_menu { } {
+# this creates the menu associated with the tools menubutton
+ global BACKGROUND FOREGROUND FONT
+ menu .main_window.menu_bar.tools.menu -background $BACKGROUND -foreground $FOREGROUND -font $FONT
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+
+# supposed to just find the angle and tell you what it is...
+# .main_window.menu_bar.tools.menu add command -label "Skew Angle" -command popup_skew_angle_menu
+
+# supposed to let the user arbitrarily rotate the image
+# .main_window.menu_bar.tools.menu add command -label "Rotate" -command popup_rotate_menu
+
+ .main_window.menu_bar.tools.menu add command -label "Recognize" -command popup_recognize_menu
+ .main_window.menu_bar.tools.menu add separator
+ .main_window.menu_bar.tools.menu add command -label "Zoom in" -command ZOOM_IN
+ .main_window.menu_bar.tools.menu add command -label "Zoom out" -command ZOOM_OUT
+ .main_window.menu_bar.tools.menu add separator
+ .main_window.menu_bar.tools.menu add command -label "Refresh" -command DISPLAY_INTERVALS
+ .main_window.menu_bar.tools.menu add command -label "Deskew" -command popup_deskew_menu
+
+# automated spell correction
+# .main_window.menu_bar.tools.menu add command -label "SpellCorrect" -command popup_correct_menu
+
+# interactive learning
+# .main_window.menu_bar.tools.menu add command -label "Learn Mode" -command popup_learn_mode
+}
+
+set CURRENT_DEFAULT_FONT Helvetica
+set CURRENT_DEFAULT_SIZE 9
+proc popup_learn_mode { } {
+ global BACKGROUND FOREGROUND SMALLFONT FONT
+ toplevel .learn
+ frame .learn.c -width 100 -height 100
+ canvas .learn.c.c -background $BACKGROUND -width 100 -height 100
+ pack .learn.c.c
+ frame .learn.s
+ entry .learn.s.learned_string -width 5 -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ label .learn.s.string_message -text "Ascii" -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ pack .learn.s.learned_string .learn.s.string_message -side left -expand 1
+
+ tk_optionMenu .learn.font CURRENT_DEFAULT_FONT Helvetica Courier Times
+ .learn.font configure -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ .learn.font.menu configure -bg $BACKGROUND -fg $FOREGROUND
+ tk_optionMenu .learn.size CURRENT_DEFAULT_SIZE 9 10 12 18
+ .learn.size configure -bg $BACKGROUND -fg $FOREGROUND -font $SMALLFONT
+ .learn.size.menu configure -bg $BACKGROUND -fg $FOREGROUND
+ frame .learn.buttons
+ button .learn.buttons.ok -text Learn -command learn_ok -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .learn.buttons.cancel -text Skip -command learn_skip -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .learn.buttons.ok .learn.buttons.cancel -side left -expand 1 -fill x
+ pack .learn.c .learn.s .learn.font .learn.size .learn.buttons -side top -fill x
+}
+
+
+
+proc popup_deskew_menu { } {
+ puts stdout "Calling get_skew"
+ DESKEW
+}
+
+proc popup_recognize_menu { } {
+ puts stdout "Calling recognize from Tcl"
+ FIND_LINES_AND_RECOGNIZE
+}
+
+set XV 2
+proc init_options_menu { } {
+ global dummy word_certainty_value screen_view_style FOREGROUND BACKGROUND FONT XV
+ menu .main_window.menu_bar.options.menu -foreground $FOREGROUND -background $BACKGROUND -font $FONT
+ .main_window.menu_bar.options.menu add command -label "Warning Levels..." -command popup_confidence_menu
+
+# .main_window.menu_bar.options.menu add command -label "Zoom Ratio" -command popup_zoom_ratio_menu
+ .main_window.menu_bar.options.menu add checkbutton -label "Display Line Boundaries" -variable DISPLAY_LINE_BOUNDARIES
+ .main_window.menu_bar.options.menu add checkbutton -label "Display Bounding Boxes" -variable DISPLAY_BOUNDING_BOXES
+ .main_window.menu_bar.options.menu add checkbutton -label "Spellcheck" -variable SPELLCHECK
+ .main_window.menu_bar.options.menu add separator
+ .main_window.menu_bar.options.menu add radiobutton -label "No Display" -variable DISPLAY_IMAGE -value 0
+ .main_window.menu_bar.options.menu add radiobutton -label "OCRchie Display" -variable DISPLAY_IMAGE -value 1
+ .main_window.menu_bar.options.menu add radiobutton -label "xv" -variable DISPLAY_IMAGE -value $XV
+ .main_window.menu_bar.options.menu add separator
+ .main_window.menu_bar.options.menu add radiobutton -label "No deskew" -variable DESKEW_METHOD -value -1
+ .main_window.menu_bar.options.menu add radiobutton -label "RLE rotate" -variable DESKEW_METHOD -value 1
+ .main_window.menu_bar.options.menu add radiobutton -label "Bitmap rotate" -variable DESKEW_METHOD -value 0
+
+}
+
+set GLOBAL_MESSAGE "<none>"
+proc init_button_bar { } {
+ global FONT BACKGROUND FOREGROUND GLOBAL_MESSAGE
+ message .main_window.button_bar.msg -font $FONT -background $BACKGROUND -foreground $FOREGROUND -width 400
+ pack .main_window.button_bar.msg
+}
+
+proc init_display { } {
+ global display_height canvas_width FOREGROUND BACKGROUND IMAGE_DISPLAY_WIN main_window_width display_height scroll_inc
+ frame .main_window.display -width $main_window_width -height $display_height -relief ridge -bd 5 -bg $BACKGROUND
+ canvas .main_window.display.work_space -bg white -xscrollcommand ".main_window.display.xscroller set" -yscrollcommand ".main_window.display.yscroller set" -xscrollincrement $scroll_inc -cursor {crosshair black gray} -width $canvas_width -height $display_height
+# two scrollbars
+ scrollbar .main_window.display.xscroller -command ".main_window.display.work_space xview" -orient horizontal -background $BACKGROUND
+ scrollbar .main_window.display.yscroller -command ".main_window.display.work_space yview" -background $BACKGROUND
+
+ pack .main_window.display.xscroller -side bottom -fill x
+ pack .main_window.display.work_space .main_window.display.yscroller -side left -fill y
+ set IMAGE_DISPLAY_WIN .main_window.display.work_space
+ .main_window.display.work_space configure -scrollregion { 0 0 5000 5000 }
+# initialize_region_grab
+}
+
+proc init_edit_window { } {
+ global edit_window_height canvas_width EDIT_BACKGROUND COLORED_WORDS LOW_PRECISION_BACKGROUND MISPELLED_BACKGROUND UNKNOWN_CHAR_BACKGROUND SMALLFONT SCALE_FACTOR scroll_inc
+ text .main_window.edit_window.text_part -bg $EDIT_BACKGROUND -height $edit_window_height -width $canvas_width -insertbackground yellow -insertwidth 8 -font $SMALLFONT -fg white -wrap word
+ pack .main_window.edit_window.text_part -side bottom
+ .main_window.edit_window.text_part tag configure LOW_PRECISION -background $LOW_PRECISION_BACKGROUND
+ .main_window.edit_window.text_part tag configure MISPELLED -background $MISPELLED_BACKGROUND
+ .main_window.edit_window.text_part tag configure UNKNOWN_CHAR -background $UNKNOWN_CHAR_BACKGROUND
+# Tab binding for the window is supposed to advance the cursor to the
+# next uncertain word and scroll the image display to show the image
+# of that word
+ bind .main_window.edit_window.text_part <Tab> {
+ if {[llength $COLORED_WORDS] == 0} {
+ puts stdout "No more words"
+ } else {
+ .main_window.edit_window.text_part mark set insert [pop_colored_words]
+ set xpos [pop_colored_words]
+ set ypos [pop_colored_words]
+# puts "xpos and ypos for this word"
+ set ulx [expr $SCALE_FACTOR * ($xpos - 300)]
+ set uly [expr $SCALE_FACTOR * ($ypos - 100)]
+ set lrx [expr $SCALE_FACTOR * ($xpos + 300)]
+ set lry [expr $SCALE_FACTOR * ($ypos + 100)]
+# I could never get this scrolling to work quite right, maybe
+# someone will figure it out someday
+ .main_window.display.work_space configure -scrollregion [list $ulx $uly $lrx $lry]
+ .main_window.display.work_space configure -scrollregion {0 0 5000 5000}
+# .main_window.display.work_space xview moveto [expr (($SCALE_FACTOR * $xpos) / $scroll_inc)]
+# .main_window.display.work_space yview moveto [expr (($SCALE_FACTOR * $ypos) / $scroll_inc)]
+
+ set x [.main_window.edit_window.text_part index insert]
+# puts "New index is $x"
+ .main_window.edit_window.text_part see insert
+ set local_tags [.main_window.edit_window.text_part tag names insert]
+# puts "Tags at this place: $local_tags"
+ }
+ break
+ }
+}
+
+proc addword { w {xpos 0} {ypos 0} {status OK}} {
+ global COLORED_WORDS LOW_PRECISION_BACKGROUND MISPELLED_BACKGROUND UNKNOWN_CHAR_BACKGROUND
+# puts stdout "Adding $w with status $status"
+
+ if { ![string compare $status OK] } {
+ .main_window.edit_window.text_part insert end "$w "
+ .main_window.edit_window.text_part mark set insert end
+ } elseif { ![string compare $status LOW_PRECISION] || ![string compare $status MISPELLED] || ![string compare $status UNKNOWN_CHAR] } {
+ .main_window.edit_window.text_part insert end "$w" $status
+ .main_window.edit_window.text_part insert end " "
+# xpos and ypos can be tags too, but they really slow things down
+# .main_window.edit_window.text_part mark set insert end
+# .main_window.edit_window.text_part mark set insert "end -3 char"
+# .main_window.edit_window.text_part tag add $status "insert wordstart" "insert wordend"
+# .main_window.edit_window.text_part tag add x$xpos "insert wordstart" "insert wordend"
+# .main_window.edit_window.text_part tag add y$ypos "insert wordstart" "insert wordend"
+ .main_window.edit_window.text_part mark set insert "end -3 char"
+ .main_window.edit_window.text_part mark set insert "insert wordstart"
+ lappend COLORED_WORDS [.main_window.edit_window.text_part index insert]
+ lappend COLORED_WORDS $xpos
+ lappend COLORED_WORDS $ypos
+ .main_window.edit_window.text_part mark set insert end
+ } else {
+ puts stdout "Unknown word status for $w: $status"
+ .main_window.edit_window.text_part insert end "$w UNKNOWNSTATUS? "
+ }
+}
+
+proc pop_colored_words { } {
+ global COLORED_WORDS
+ set x [lindex $COLORED_WORDS 0]
+ if {[llength $COLORED_WORDS] == 1} {
+ set COLORED_WORDS {}
+ } elseif {[llength $COLORED_WORDS] == 0} {
+ set COLORED_WORDS $COLORED_WORDS
+ } else {
+ set COLORED_WORDS [lrange $COLORED_WORDS 1 [llength $COLORED_WORDS]]
+ }
+ return $x
+}
+
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+
+set started_region 0
+set region_count 0
+proc initialize_region_grab { } {
+#
+# NONE OF THE REGION STUFF IS BEING USED CURRENTLY
+#
+# facilitates the grabbing of a rectangle of the window
+# using mouse button 1
+# and apparently a lot of other junk!
+
+ global x_init y_init x_final y_final started_region region_data region_list region_id arrow_in_progress current_arrow
+
+ bind .main_window.display.work_space <ButtonPress-1> {
+ if [expr ! $started_region] {
+ grab set .main_window.display
+ set x_init [.main_window.display.work_space canvasx %x]
+ set y_init [.main_window.display.work_space canvasy %y]
+
+ set region_id [.main_window.display.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3]
+ .main_window.display.work_space itemconfigure $region_id -tags region$region_id
+
+ set started_region 1
+
+ }
+ }
+ bind .main_window.display.work_space <ButtonRelease-1> {
+ set x_final [.main_window.display.work_space canvasx %x]
+ set y_final [.main_window.display.work_space canvasy %y]
+
+ .main_window.display.work_space coords region$region_id $x_init $y_init $x_final $y_final
+
+
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) 0
+ lappend region_list $region_id
+
+ make_region_buttons $region_id
+
+ set started_region 0
+ grab release .main_window.display
+ }
+
+ bind .main_window.display.work_space <B2-Motion> {
+ if $arrow_in_progress {
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+ .main_window.display.work_space coords $current_arrow 0 0 $curx $cury
+ }
+ }
+ bind .main_window.display.work_space <B1-Motion> {
+ if $started_region {
+
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+
+ .main_window.display.work_space coords region$region_id $x_init $y_init $curx $cury
+
+
+ }
+ }
+ bind .main_window.display <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release .main_window.display
+ set started_region 0
+ .main_window.display.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+set arrow_in_progress 0
+proc make_region_buttons { reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [.main_window.display.work_space create rectangle $x_init $y_init [expr $x_init + 20] [expr $y_init + 20] -fill blue -tags "region$reg_id next_button$reg_id"]
+ set next_button_data($next_num,reg_id) $reg_id
+ .main_window.display.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([.main_window.display.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ finish_arrow $reg_id
+ } else {
+ set canvas_x [.main_window.display.work_space canvasx %x]
+ set canvas_y [.main_window.display.work_space canvasy %y]
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [.main_window.display.work_space create rectangle [expr $x_init + 20] $y_init [expr $x_init + 40] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ .main_window.display.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([.main_window.display.work_space find withtag current],reg_id)
+ destroy_region $reg_id .main_window.display.work_space
+ }
+}
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow
+ set path_name .main_window.display.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [.main_window.display.work_space create line $x_start $y_start $x_start $y_start -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 1
+ set current_arrow $arrow
+}
+
+proc destroy_region { reg_id path_name } {
+ $path_name delete region$reg_id
+ puts stdout "Destroying $reg_id"
+}
+
+set open_menu_geometry 250x300+400+400
+set current_directory [pwd]
+set box_entry $current_directory
+set open_menu_pattern *.tif
+
+proc popup_open_menu { } {
+#
+# this procedure pops up an interactive box which can be used to open files
+# what a horrible mess. Writing it took forever
+#
+ global open_menu_geometry open_menu_pattern current_directory FONT FOREGROUND BACKGROUND SMALLFONT box_entry
+
+ toplevel .open_menu
+ wm geometry .open_menu $open_menu_geometry
+ wm title .open_menu Open
+ .open_menu configure -background $BACKGROUND
+ # force the user to interact with this box
+ # grab set .open_menu
+
+ # directory listing and scrollbar
+ frame .open_menu.dirstuff
+
+ frame .open_menu.cur_dir
+ label .open_menu.cur_dir.l -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -text "Dir: "
+ entry .open_menu.cur_dir.e -relief sunken -bd 2 -textvariable box_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ .open_menu.cur_dir.e icursor end
+ bind .open_menu.cur_dir.e <Return> {
+ set file_to_open $box_entry
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+ puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } elseif [file exists $file_to_open] {
+ puts stdout "Opening file $file_to_open"
+ my_open $file_to_open
+ destroy .open_menu
+ } else {
+ puts stdout "Cannot acccess that file"
+ }
+ }
+ pack .open_menu.cur_dir.l .open_menu.cur_dir.e -side left
+
+ scrollbar .open_menu.dirstuff.yscroll -command ".open_menu.dirstuff.directory yview" -background $BACKGROUND
+ listbox .open_menu.dirstuff.directory -yscrollcommand ".open_menu.dirstuff.yscroll set" -width 22 -height 11 -relief raised -font $SMALLFONT -background $BACKGROUND -foreground $FOREGROUND
+
+ fill_in_directory_box $current_directory $open_menu_pattern
+ bind .open_menu.dirstuff.directory <Double-Button-1> {
+ set file_to_open [selection get]
+# puts stdout "Bound button"
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+# puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } elseif [file exists $file_to_open] {
+# puts stdout "Opening file $file_to_open"
+ my_open $file_to_open
+ destroy .open_menu.dirstuff.directory
+ destroy .open_menu
+ } else {
+ puts stdout "Cannot access that file"
+ }
+ }
+ # pattern for listings to match
+
+
+ frame .open_menu.pattern_match -background $BACKGROUND
+ label .open_menu.pattern_match.label -text "Match files of type:" -font $SMALLFONT -background $BACKGROUND -fg $FOREGROUND
+ entry .open_menu.pattern_match.entry -width 5 -relief sunken -bd 2 -textvariable open_menu_pattern -font $SMALLFONT -background $BACKGROUND -fg $FOREGROUND
+ # refresh the directory listing after user presses return
+ bind .open_menu.pattern_match.entry <Return> {
+ set current_directory [pwd]
+ clear_directory_box
+ fill_in_directory_box $current_directory $open_menu_pattern
+ }
+
+ pack .open_menu.pattern_match.label .open_menu.pattern_match.entry -side left
+ pack .open_menu.dirstuff.directory .open_menu.dirstuff.yscroll -side left -fill y
+
+ pack .open_menu.pattern_match .open_menu.cur_dir .open_menu.dirstuff -side top -anchor w
+ focus .open_menu.pattern_match.entry
+}
+
+proc popup_confidence_menu { } {
+# a little box for the user to change the confidence
+# warning levels (words that get highlighted)
+ global BACKGROUND FOREGROUND SMALLFONT FONT
+ toplevel .confidence -background $BACKGROUND
+ wm geometry .confidence 250x225+500+500
+ message .confidence.m -text "Warning thresholds for the output display\n (255 = warn unless perfect)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT -justify center -width 250
+ scale .confidence.very_low -from 0 -to 255 -variable VERY_LOW_CONFIDENCE -orient horizontal -label "Poor (displayed in red)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+ scale .confidence.low -from 0 -to 255 -variable LOW_CONFIDENCE -orient horizontal -label "Fair (displayed in blue)" -background $BACKGROUND -foreground $FOREGROUND -font $SMALLFONT
+ pack .confidence.m .confidence.very_low .confidence.low -side top -fill x
+}
+
+proc my_open { filename } {
+# 1 means success
+ global IMAGE_DISPLAY_WIN SCALE_FACTOR DISPLAY_IMAGE XV xvprocess
+# puts stdout "Opening $filename"
+ page_open $filename
+# puts stdout "Done putting into page structure"
+ if { 1 } {
+ set display_height [expr $SCALE_FACTOR * [get_page_height]]
+ set display_width [expr $SCALE_FACTOR * [get_page_width]]
+ append geometry [expr int($display_width)] x [expr int($display_height)]
+# puts stdout "Displaying Image"
+ if { $DISPLAY_IMAGE == $XV } {
+ set xvprocess [exec xv $filename &]
+ puts stdout "xvprocess $xvprocess"
+ } else {
+# use the canvas...
+ DISPLAY_INTERVALS
+ }
+ } else {
+ popup_image_failure_win
+ }
+}
+
+set save_entry "recog.txt"
+proc popup_save_ascii_menu { } {
+#
+# Pops up a little window for saving the ascii recognized text
+# Should have a general function for all the saves, but now
+# they are just cuts and pastes
+#
+#
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT write_image
+
+ set save_entry recog.txt
+ toplevel .save_ascii -background $BACKGROUND
+ wm geometry .save_ascii $save_ascii_geometry
+ wm title .save_ascii "Save ASCII Text"
+ grab set .save_ascii
+
+
+ label .save_ascii.image -bitmap @$write_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_ascii.s -background $BACKGROUND
+ label .save_ascii.s.txt -text "Save ascii text as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_ascii.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_ascii.s.txt .save_ascii.s.ent -side top
+ frame .save_ascii.buttons
+ button .save_ascii.buttons.ok -text OK -command save_ascii -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_ascii.buttons.cancel -text Cancel -command save_ascii_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_ascii.buttons.ok .save_ascii.buttons.cancel -side left -expand 1 -fill x
+ pack .save_ascii.image .save_ascii.s .save_ascii.buttons -side top
+
+ bind .save_ascii.s.ent <Return> {
+ save_ascii
+ }
+}
+
+proc save_ascii_cancel { } {
+ destroy .save_ascii
+}
+
+proc save_ascii { } {
+ # need to put some error checking in here
+ global save_entry
+ set fileid [open $save_entry w]
+ puts $fileid [.main_window.edit_window.text_part get 1.0 end]
+ close $fileid
+ destroy .save_ascii
+}
+
+proc popup_save_word_pos_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry OCRCHIE_ROOT face_image
+
+ set save_entry recog.wps
+ toplevel .save_word_pos -background $BACKGROUND
+ wm geometry .save_word_pos $save_ascii_geometry
+ wm title .save_word_pos "Save in word/pos format"
+ grab set .save_word_pos
+
+ label .save_word_pos.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_word_pos.s -background $BACKGROUND
+ label .save_word_pos.s.txt -text "Save word_pos text as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_word_pos.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_word_pos.s.txt .save_word_pos.s.ent -side top
+ frame .save_word_pos.buttons
+ button .save_word_pos.buttons.ok -text OK -command save_word_pos -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_word_pos.buttons.cancel -text Cancel -command save_word_pos_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_word_pos.buttons.ok .save_word_pos.buttons.cancel -side left -expand 1 -fill x
+ pack .save_word_pos.image .save_word_pos.s .save_word_pos.buttons -side top
+
+ bind .save_word_pos.s.ent <Return> {
+ save_word_pos
+ }
+}
+
+proc save_word_pos_cancel { } {
+ destroy .save_word_pos
+}
+
+proc save_word_pos { } {
+ # need to put some error checking in here
+ global save_entry
+ WRITE_WORD_POS $save_entry
+ destroy .save_word_pos
+}
+
+proc popup_save_learned_chars_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry face_image
+ set save_entry learn.dat
+ toplevel .save_learned_chars -background $BACKGROUND
+ wm geometry .save_learned_chars $save_ascii_geometry
+ wm title .save_learned_chars "Write Learned Characters"
+ grab set .save_learned_chars
+
+ label .save_learned_chars.image -bitmap @$face_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .save_learned_chars.s -background $BACKGROUND
+ label .save_learned_chars.s.txt -text "Save learned characters as:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .save_learned_chars.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .save_learned_chars.s.txt .save_learned_chars.s.ent -side top
+ frame .save_learned_chars.buttons
+ button .save_learned_chars.buttons.ok -text OK -command save_learned_chars -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .save_learned_chars.buttons.cancel -text Cancel -command save_learned_chars_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .save_learned_chars.buttons.ok .save_learned_chars.buttons.cancel -side left -expand 1 -fill x
+ pack .save_learned_chars.image .save_learned_chars.s .save_learned_chars.buttons -side top
+
+ bind .save_learned_chars.s.ent <Return> {
+ save_learned_chars
+ }
+}
+
+proc save_learned_chars_cancel { } {
+ destroy .save_learned_chars
+}
+
+proc save_learned_chars { } {
+ # need to put some error checking in here?
+ global save_entry
+ WRITE_LEARNED_CHARS $save_entry
+ destroy .save_learned_chars
+}
+
+proc popup_read_learned_chars_menu { } {
+ global save_ascii_geometry BACKGROUND FOREGROUND FONT SMALLFONT save_entry eye_image
+ set save_entry learn.dat
+ toplevel .read_learned_chars -background $BACKGROUND
+ wm geometry .read_learned_chars $save_ascii_geometry
+ wm title .read_learned_chars "Read Learned Characters"
+ grab set .read_learned_chars
+
+ label .read_learned_chars.image -bitmap @$eye_image -foreground $FOREGROUND -background $BACKGROUND
+ frame .read_learned_chars.s -background $BACKGROUND
+ label .read_learned_chars.s.txt -text "Read learned characters from:" -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ entry .read_learned_chars.s.ent -relief sunken -bd 2 -textvariable save_entry -foreground $FOREGROUND -background $BACKGROUND -font $SMALLFONT
+ pack .read_learned_chars.s.txt .read_learned_chars.s.ent -side top
+ frame .read_learned_chars.buttons
+ button .read_learned_chars.buttons.ok -text OK -command read_learned_chars -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ button .read_learned_chars.buttons.cancel -text Cancel -command read_learned_chars_cancel -fg $FOREGROUND -background $BACKGROUND -font $SMALLFONT -width 5
+ pack .read_learned_chars.buttons.ok .read_learned_chars.buttons.cancel -side left -expand 1 -fill x
+ pack .read_learned_chars.image .read_learned_chars.s .read_learned_chars.buttons -side top
+
+ bind .read_learned_chars.s.ent <Return> {
+ read_learned_chars
+ }
+}
+
+proc read_learned_chars_cancel { } {
+ destroy .read_learned_chars
+}
+
+proc read_learned_chars { } {
+ # need to put some error checking in here
+ global save_entry
+ LEARN_DATA $save_entry
+ destroy .read_learned_chars
+}
+
+
+proc PAGE_OPEN { filename } {
+# unused
+ return 1
+}
+
+proc clear_directory_box { } {
+ .open_menu.dirstuff.directory delete 0 end
+}
+
+proc fill_in_directory_box { dirname {pattern *} } {
+# fills in the directory box with directories or files matching the pattern
+ foreach i [exec ls -aF $dirname] {
+ if [file isdirectory $i] {
+ .open_menu.dirstuff.directory insert end $i
+ } elseif [string match $pattern $i] {
+ .open_menu.dirstuff.directory insert end $i
+ }
+ }
+}
+
+
+proc popup_quit_dialog_box { } {
+ global quit_dialog_geometry BACKGROUND FOREGROUND FONT OCRCHIE_ROOT
+
+ toplevel .quit_dialog
+ wm geometry .quit_dialog $quit_dialog_geometry
+ wm title .quit_dialog Quit
+ grab set .quit_dialog
+
+ append caution_image_name $OCRCHIE_ROOT caution.xbm
+ label .quit_dialog.image -bitmap @$caution_image_name -foreground $FOREGROUND -background $BACKGROUND
+ message .quit_dialog.msg -text "You are about to quit OCRchie. All changes you have made will be lost." -font $FONT -background $BACKGROUND -fg $FOREGROUND -width 275 -justify center
+ frame .quit_dialog.buttons
+ button .quit_dialog.buttons.ok -text OK -command quit_ok -fg $FOREGROUND -background $BACKGROUND -font $FONT -width 5
+ button .quit_dialog.buttons.cancel -text Cancel -command quit_cancel -fg $FOREGROUND -background $BACKGROUND -font $FONT -width 5
+ pack .quit_dialog.buttons.ok .quit_dialog.buttons.cancel -side left -expand 1 -fill x
+ pack .quit_dialog.image .quit_dialog.msg .quit_dialog.buttons -side top -fill x
+
+
+}
+
+proc quit_ok { } {
+# destroy .t
+# destroy .histogram
+ destroy .main_window
+ destroy .quit_dialog
+ QUIT
+}
+
+proc quit_cancel { } {
+ global command_not_in_progress
+ set command_not_in_progress 1
+ destroy .quit_dialog
+}
+
+proc clear_canvas { } {
+ destroy .main_window.display.work_space
+ destroy .main_window.display.xscroller
+ destroy .main_window.display.yscroller
+ destroy .main_window.display
+ init_display
+}
+
+proc spellcheck { word } {
+# spellchecks a word
+# could change to use spell or some faster program
+ global x
+ set x [exec echo $word | ispell -a]
+ if { ([string last * $x] == -1) && ([string last + $x] == -1) } {
+ return MISPELLED
+ } else {
+ return SPELLED_CORRECTLY
+ }
+}
+
+
+init_user_interface
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/newocr-ui.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,403 @@
+#!/usr/sww/bin/wish -f
+#
+# user interface code (tcl visuals) for OCR
+# started 9/95, Archie Russell
+
+
+set main_window_width 800
+set main_window_height 800
+set dummy 0
+# I'd like to be able to use the above parameters in here,
+# but I think tcl might get a little angry if I try
+# the size of the window, and the position of its upper left
+set main_window_geometry 800x800+200+100
+
+set menu_bar_width $main_window_width
+set menu_bar_height 50
+set button_bar_width $main_window_width
+set button_bar_height 100
+set display_height 700
+# save a little room for scrollbars, etc.
+
+set canvas_width [expr $main_window_width - 30]
+proc init_user_interface {} {
+
+# tcl requires declaration of global variables used in a fxn
+
+ global main_window_geometry main_window_width main_window_height menu_bar_width menu_bar_height button_bar_width button_bar_height display_height
+
+# toplevel windows are at the same level as 'xterms'
+ toplevel .main_window
+
+# $ sign means 'get the value' (otherwise uses the string)
+ wm geometry .main_window $main_window_geometry
+ wm title .main_window "OCR user interface"
+
+# frames are subwindows that are there mostly to 'hold' other windows
+ frame .main_window.menu_bar -width $menu_bar_width -height $menu_bar_height -relief raised -bd 2
+ init_menu_bar
+
+ frame .main_window.button_bar -width $button_bar_width -height $button_bar_height -relief ridge -bd 5
+ init_button_bar
+
+ frame .main_window.display -width $main_window_width -height $display_height -relief ridge -bd 5
+ init_display
+
+# pack puts things together: this will put the menu_bar window just above the button_bar_window above the display
+
+ pack .main_window.menu_bar .main_window.button_bar .main_window.display -side top -anchor w
+ focus .main_window
+}
+
+proc init_menu_bar { } {
+# this command initializes the main menu bar (stuff like file, etc)
+# shortcuts not working! why?
+
+ menubutton .main_window.menu_bar.file -text "File " -underline 0 -menu .main_window.menu_bar.file.menu -borderwidth 2
+ init_file_menu
+
+ menubutton .main_window.menu_bar.edit -text "Edit " -underline 0 -menu .main_window.menu_bar.edit.menu -borderwidth 2
+ init_edit_menu
+
+ menubutton .main_window.menu_bar.options -text "Options " -underline 0 -menu .main_window.menu_bar.options.menu -borderwidth 2
+ init_options_menu
+
+ pack .main_window.menu_bar.file .main_window.menu_bar.edit .main_window.menu_bar.options -side left -padx 1m -pady 1m -fill x
+
+}
+
+proc init_file_menu { } {
+# this creates the menu associated with the file menubutton
+ menu .main_window.menu_bar.file.menu
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.file.menu add command -label "Open..." -command popup_open_menu
+ .main_window.menu_bar.file.menu add command -label "Close" -command popup_close_menu
+# a separator is just a horizontal line for show
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Save" -command default_save
+ .main_window.menu_bar.file.menu add command -label "Save As..." -command default_save
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Quit" -command popup_quit_dialog_box
+}
+
+proc init_edit_menu { } {
+ global dummy
+ menu .main_window.menu_bar.edit.menu
+ .main_window.menu_bar.edit.menu add radiobutton -label "Nothing" -variable dummy -value 0
+ .main_window.menu_bar.edit.menu add radiobutton -label "Yet" -variable dummy -value 1
+ .main_window.menu_bar.edit.menu add radiobutton -label "Here" -variable dummy -value 2
+}
+
+set word_certainty_value normal
+set screen_view_style facing_page
+proc init_options_menu { } {
+ global dummy word_certainty_value screen_view_style
+ menu .main_window.menu_bar.options.menu
+ .main_window.menu_bar.options.menu add cascade -label "Word Certainty" -menu .main_window.menu_bar.options.menu.word_certainty
+
+ menu .main_window.menu_bar.options.menu.word_certainty
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Stringent" -variable word_certainty_value -value stringent
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Normal" -variable word_certainty_value -value normal
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Lenient" -variable word_certainty_value -value lenient
+
+ .main_window.menu_bar.options.menu add cascade -label "Screen View" -menu .main_window.menu_bar.options.menu.screen_view
+ menu .main_window.menu_bar.options.menu.screen_view
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "facing page" -variable screen_view_style -value facing_page
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "interleave lines" -variable screen_view_style -value interleave_lines
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "translation only" -variable screen_view_style -value translation_only
+
+
+}
+
+proc init_button_bar { } {}
+ proc init_display { } {
+ global display_height canvas_width
+ canvas .main_window.display.work_space -bg white -height $display_height -width $canvas_width -xscrollcommand ".main_window.display.xscroller set" -yscrollcommand ".main_window.display.yscroller set"
+# -scrollincrement 30 -cursor {crosshair black gray} ;; removed for tcl 8.0?
+# two scrollbars
+ scrollbar .main_window.display.xscroller -command ".main_window.display.work_space xview" -orient horizontal
+ scrollbar .main_window.display.yscroller -command ".main_window.display.work_space yview"
+
+ pack .main_window.display.xscroller -side bottom -fill x
+ pack .main_window.display.work_space .main_window.display.yscroller -side left -fill y
+ .main_window.display.work_space configure -scrollregion { -5000 -5000 5000 5000 }
+ initialize_bindings
+ test_canvas
+}
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+set mouse_mode NONE
+set started_region 0
+set region_count 0
+proc initialize_bindings { } {
+ # facilitates the grabbing of a rectangle of the window
+ # using mouse button 1
+ # and apparently a lot of other junk!
+
+ global region_data regions next_button_data next_buttons kill_button_data kill_buttons arrow_data arrows mouse_mode current_object
+
+# bind .main_window.display.work_space <ButtonPress-1> {
+# if [expr ! [string compare $mouse_mode NONE]] {
+# set current_object [find withtag current]
+# if [expr ($current_object == "") || ((expr ! [lsearch $next_buttons $current_object]) && (expr ! [lsearch $kill_buttons $current_object]))] {
+# set mouse_mode making_region
+# # start creating the region
+# grab set .main_window.display
+# set x_init [.main_window.display.work_space canvasx %x]
+# set y_init [.main_window.display.work_space canvasy %y]
+# set region_id [.main_window.display.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3]
+# set region_data($region_id,x_init) $x_init
+# set region_data($region_id,y_init) $y_init
+# .main_window.display.work_space itemconfigure $region_id -tags region$region_id
+# lappend regions $region_id
+
+# } elseif {[lsearch $next_buttons $current_object] != -1} {
+# set mouse_mode making_arrow
+# grab set .main_window.display
+# set arrow_id [.main_window.display.work_space create line 0 0 1 1]
+# set arrow_data($arrow_id,x_init) $next_button_data($current_object,x_center)
+# set arrow_data($arrow_id,y_init) $next_button_data($current_object,y_center)
+# set arrow_data($arrow_id,start_region) $next_button_data($current_object,region_id)
+# .main_window.display.work_space coords $arrow_id $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init) $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init)
+# .main_window.display.work_space itemconfigure $arrow_id -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$arrow_id
+
+# lappend arrows $arrow_id
+# } elseif {[lsearch $kill_buttons $current_object] != -1} {
+# set mouse_mode killing_region
+# } elseif {[search $prev_buttons $current_object] != -1} {
+# set moude_mode moving_arrow
+# } else {
+# puts stdout unknown-mode
+# }
+# } else {
+# puts stdout "strange: looks like you are in some unknown state. Sorry"
+# }
+# }
+ bind .main_window.display.work_space <ButtonRelease-1> {
+ if [expr ! [string compare $mouse_mode making_region]] {
+ set region_id $current_object
+ set x_final [.main_window.display.work_space canvasx %x]
+ set y_final [.main_window.display.work_space canvasy %y]
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ .main_window.display.work_space coords region$region_id $x_init $y_init $x_final $y_final
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) NONE
+
+ make_region_buttons $region_id
+ grab release .main_window.display
+ set mouse_mode NONE
+ set current_object NONE
+ }
+ }
+
+
+
+
+
+ bind .main_window.display.work_space <B1-Motion> {
+ if [expr ! [string compare $mouse_mode making_region]]
+ {
+ set region_id $current_object
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+ .main_window.display.work_space coords region$region_id $x_init $y_init $curx $cury
+ }
+
+ }
+ bind .main_window.display <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release .main_window.display
+ set started_region 0
+ .main_window.display.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+set arrow_in_progress 0
+proc make_region_buttons { reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [.main_window.display.work_space create rectangle $x_init $y_init [expr $x_init + 20] [expr $y_init + 20] -fill blue -tags "region$reg_id next_button$reg_id"]
+ set next_button_data($next_num,reg_id) $reg_id
+ .main_window.display.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([.main_window.display.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ finish_arrow $reg_id
+ } else {
+ set canvas_x [.main_window.display.work_space canvasx %x]
+ set canvas_y [.main_window.display.work_space canvasy %y]
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [.main_window.display.work_space create rectangle [expr $x_init + 20] $y_init [expr $x_init + 40] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ .main_window.display.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([.main_window.display.work_space find withtag current],reg_id)
+ destroy_region $reg_id .main_window.display.work_space
+ }
+}
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow
+ set path_name .main_window.display.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [.main_window.display.work_space create line $x_start $y_start $x_start $y_start -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 1
+ set current_arrow $arrow
+}
+
+proc destroy_region { reg_id path_name } {
+ $path_name delete region$reg_id
+ puts stdout "Destroying $reg_id"
+}
+
+proc test_canvas { } {
+# just display some junk on the canvas
+ .main_window.display.work_space create text 400 200 -text "Document and text will be displayed here" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill black
+ .main_window.display.work_space create text 400 250 -text "Can be displayed in multiple colors etc." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill red
+ .main_window.display.work_space create text 400 300 -text "Can grab rectangles of stuff here." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill green
+ .main_window.display.work_space create text 400 350 -text "other things semi-working: quit and open (under file)" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill blue
+ .main_window.display.work_space create text 200 200 -font *-times-*-*-*--10-*-*-*-*-*-*-* -fill black -text "If I hit return
+Will it make any difference
+return
+return"
+}
+
+set open_menu_geometry 600x300+300+400
+set current_directory [pwd]
+set open_menu_pattern *
+
+proc popup_open_menu { } {
+# this procedure pops up an interactive box which can be used to open files
+# bug: cannot exit menu without selecting a file
+ global open_menu_geometry open_menu_pattern current_directory
+
+ toplevel .open_menu
+ wm geometry .open_menu $open_menu_geometry
+ wm title .open_menu Open
+
+ # force the user to interact with this box
+ grab set .open_menu
+
+ # directory listing and scrollbar
+ frame .open_menu.dirstuff
+ scrollbar .open_menu.dirstuff.yscroll -command ".open_menu.dirstuff.directory yview"
+ listbox .open_menu.dirstuff.directory -yscrollcommand ".open_menu.dirstuff.yscroll set"
+## -geometry 25x12 -relief raised # removed 6/2000
+ fill_in_directory_box $current_directory $open_menu_pattern
+
+ bind .open_menu.dirstuff.directory <Double-Button-1> {
+ set file_to_open [selection get]
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+ puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } else {
+ puts stdout "Opening file $file_to_open"
+ destroy .open_menu
+ }
+ }
+ # pattern for listings to match
+ frame .open_menu.pattern_match
+ label .open_menu.pattern_match.label -text "Match files of type:"
+ entry .open_menu.pattern_match.entry -width 5 -relief sunken -bd 2 -textvariable open_menu_pattern
+ # refresh the directory listing after user presses return
+ bind .open_menu.pattern_match.entry <Return> {
+ set current_directory [pwd]
+ clear_directory_box
+ fill_in_directory_box $current_directory $open_menu_pattern
+ }
+
+ pack .open_menu.pattern_match.label .open_menu.pattern_match.entry -side left
+ pack .open_menu.dirstuff.directory .open_menu.dirstuff.yscroll -side left -fill y
+ pack .open_menu.pattern_match .open_menu.dirstuff -side top
+ focus .open_menu.pattern_match.entry
+}
+
+
+
+
+
+
+
+proc clear_directory_box { } {
+ .open_menu.dirstuff.directory delete 0 end
+}
+
+proc fill_in_directory_box { dirname {pattern *} } {
+ foreach i [exec ls -aF $dirname] {
+ if [file isdirectory $i] {
+ .open_menu.dirstuff.directory insert end $i
+ } elseif [string match $pattern $i] {
+ .open_menu.dirstuff.directory insert end $i
+ }
+ }
+}
+
+set quit_dialog_geometry 300x150+500+500
+proc popup_quit_dialog_box { } {
+ global quit_dialog_geometry
+
+ toplevel .quit_dialog
+ wm geometry .quit_dialog $quit_dialog_geometry
+ wm title .quit_dialog Quit
+ grab set .quit_dialog
+
+ message .quit_dialog.msg -text "You are about to quit OCR-orama. All changes you have made will be lost."
+ frame .quit_dialog.buttons
+ button .quit_dialog.buttons.ok -text OK -command quit_ok
+ button .quit_dialog.buttons.cancel -text Cancel -command quit_cancel
+ pack .quit_dialog.buttons.ok .quit_dialog.buttons.cancel -side left -expand 1 -fill x
+ pack .quit_dialog.msg .quit_dialog.buttons -side top -fill x
+
+
+}
+
+proc quit_ok { } {
+ destroy .main_window
+ destroy .quit_dialog
+}
+
+proc quit_cancel { } {
+ global command_not_in_progress
+ set command_not_in_progress 1
+ destroy .quit_dialog
+}
+
+init_user_interface
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/ocr-ui.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,404 @@
+#!/usr/sww/bin/wish -f
+#
+# user interface code (tcl visuals) for OCR
+# started 9/95, Archie Russell
+
+
+set main_window_width 800
+set main_window_height 800
+set dummy 0
+# I'd like to be able to use the above parameters in here,
+# but I think tcl might get a little angry if I try
+# the size of the window, and the position of its upper left
+set main_window_geometry 800x800+200+100
+
+set menu_bar_width $main_window_width
+set menu_bar_height 50
+set button_bar_width $main_window_width
+set button_bar_height 100
+set display_height 700
+# save a little room for scrollbars, etc.
+
+set canvas_width [expr $main_window_width - 30]
+proc init_user_interface {} {
+
+# tcl requires declaration of global variables used in a fxn
+
+ global main_window_geometry main_window_width main_window_height menu_bar_width menu_bar_height button_bar_width button_bar_height display_height
+
+# toplevel windows are at the same level as 'xterms'
+ toplevel .main_window
+
+# $ sign means 'get the value' (otherwise uses the string)
+ wm geometry .main_window $main_window_geometry
+ wm title .main_window "OCR user interface"
+
+# frames are subwindows that are there mostly to 'hold' other windows
+ frame .main_window.menu_bar -width $menu_bar_width -height $menu_bar_height -relief raised -bd 2
+ init_menu_bar
+
+ frame .main_window.button_bar -width $button_bar_width -height $button_bar_height -relief ridge -bd 5
+ init_button_bar
+
+ frame .main_window.display -width $main_window_width -height $display_height -relief ridge -bd 5
+ init_display
+
+# pack puts things together: this will put the menu_bar window just above the button_bar_window above the display
+
+ pack .main_window.menu_bar .main_window.button_bar .main_window.display -side top -anchor w
+ focus .main_window
+}
+
+proc init_menu_bar { } {
+# this command initializes the main menu bar (stuff like file, etc)
+# shortcuts not working! why?
+
+ menubutton .main_window.menu_bar.file -text "File " -underline 0 -menu .main_window.menu_bar.file.menu -borderwidth 2
+ init_file_menu
+
+ menubutton .main_window.menu_bar.edit -text "Edit " -underline 0 -menu .main_window.menu_bar.edit.menu -borderwidth 2
+ init_edit_menu
+
+ menubutton .main_window.menu_bar.options -text "Options " -underline 0 -menu .main_window.menu_bar.options.menu -borderwidth 2
+ init_options_menu
+
+ pack .main_window.menu_bar.file .main_window.menu_bar.edit .main_window.menu_bar.options -side left -padx 1m -pady 1m -fill x
+
+}
+
+proc init_file_menu { } {
+# this creates the menu associated with the file menubutton
+ menu .main_window.menu_bar.file.menu
+# and these initialize the entries in the menu (open is linked to the command popup_open_menu)
+ .main_window.menu_bar.file.menu add command -label "Open..." -command popup_open_menu
+ .main_window.menu_bar.file.menu add command -label "Close" -command popup_close_menu
+# a separator is just a horizontal line for show
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Save" -command default_save
+ .main_window.menu_bar.file.menu add command -label "Save As..." -command default_save
+ .main_window.menu_bar.file.menu add separator
+ .main_window.menu_bar.file.menu add command -label "Quit" -command popup_quit_dialog_box
+}
+
+proc init_edit_menu { } {
+ global dummy
+ menu .main_window.menu_bar.edit.menu
+ .main_window.menu_bar.edit.menu add radiobutton -label "Nothing" -variable dummy -value 0
+ .main_window.menu_bar.edit.menu add radiobutton -label "Yet" -variable dummy -value 1
+ .main_window.menu_bar.edit.menu add radiobutton -label "Here" -variable dummy -value 2
+}
+
+set word_certainty_value normal
+set screen_view_style facing_page
+proc init_options_menu { } {
+ global dummy word_certainty_value screen_view_style
+ menu .main_window.menu_bar.options.menu
+ .main_window.menu_bar.options.menu add cascade -label "Word Certainty" -menu .main_window.menu_bar.options.menu.word_certainty
+
+ menu .main_window.menu_bar.options.menu.word_certainty
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Stringent" -variable word_certainty_value -value stringent
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Normal" -variable word_certainty_value -value normal
+ .main_window.menu_bar.options.menu.word_certainty add radiobutton -label "Lenient" -variable word_certainty_value -value lenient
+
+ .main_window.menu_bar.options.menu add cascade -label "Screen View" -menu .main_window.menu_bar.options.menu.screen_view
+ menu .main_window.menu_bar.options.menu.screen_view
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "facing page" -variable screen_view_style -value facing_page
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "interleave lines" -variable screen_view_style -value interleave_lines
+ .main_window.menu_bar.options.menu.screen_view add radiobutton -label "translation only" -variable screen_view_style -value translation_only
+
+
+}
+
+proc init_button_bar { } {
+}
+
+proc init_display { } {
+ global display_height canvas_width
+ canvas .main_window.display.work_space -bg white -height $display_height -width $canvas_width -xscrollcommand ".main_window.display.xscroller set" -yscrollcommand ".main_window.display.yscroller set" -cursor {crosshair black gray}
+### -scrollincrement 30
+# two scrollbars
+ scrollbar .main_window.display.xscroller -command ".main_window.display.work_space xview" -orient horizontal
+ scrollbar .main_window.display.yscroller -command ".main_window.display.work_space yview"
+
+ pack .main_window.display.xscroller -side bottom -fill x
+ pack .main_window.display.work_space .main_window.display.yscroller -side left -fill y
+ .main_window.display.work_space configure -scrollregion { -5000 -5000 5000 5000 }
+ initialize_bindings
+ test_canvas
+}
+
+set x_init 0
+set y_init 0
+set x_final 0
+set y_final 0
+set mouse_mode NONE
+set started_region 0
+set region_count 0
+proc initialize_bindings { } {
+ # facilitates the grabbing of a rectangle of the window
+ # using mouse button 1
+ # and apparently a lot of other junk!
+
+ global region_data regions next_button_data next_buttons kill_button_data kill_buttons arrow_data arrows mouse_mode current_object
+
+# bind .main_window.display.work_space <ButtonPress-1> {
+# if [expr ! [string compare $mouse_mode NONE]] {
+# set current_object [find withtag current]
+# if [expr ($current_object == "") || ((expr ! [lsearch $next_buttons $current_object]) && (expr ! [lsearch $kill_buttons $current_object]))] {
+# set mouse_mode making_region
+# # start creating the region
+# grab set .main_window.display
+# set x_init [.main_window.display.work_space canvasx %x]
+# set y_init [.main_window.display.work_space canvasy %y]
+# set region_id [.main_window.display.work_space create rectangle $x_init $y_init $x_init $y_init -outline black -width 3]
+# set region_data($region_id,x_init) $x_init
+# set region_data($region_id,y_init) $y_init
+# .main_window.display.work_space itemconfigure $region_id -tags region$region_id
+# lappend regions $region_id
+
+# } elseif {[lsearch $next_buttons $current_object] != -1} {
+# set mouse_mode making_arrow
+# grab set .main_window.display
+# set arrow_id [.main_window.display.work_space create line 0 0 1 1]
+# set arrow_data($arrow_id,x_init) $next_button_data($current_object,x_center)
+# set arrow_data($arrow_id,y_init) $next_button_data($current_object,y_center)
+# set arrow_data($arrow_id,start_region) $next_button_data($current_object,region_id)
+# .main_window.display.work_space coords $arrow_id $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init) $arrow_data($arrow_id,x_init) $arrow_data($arrow_id,y_init)
+# .main_window.display.work_space itemconfigure $arrow_id -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$arrow_id
+
+# lappend arrows $arrow_id
+# } elseif {[lsearch $kill_buttons $current_object] != -1} {
+# set mouse_mode killing_region
+# } elseif {[search $prev_buttons $current_object] != -1} {
+# set moude_mode moving_arrow
+# } else {
+# puts stdout unknown-mode
+# }
+# } else {
+# puts stdout "strange: looks like you are in some unknown state. Sorry"
+# }
+# }
+ bind .main_window.display.work_space <ButtonRelease-1> {
+ if [expr ! [string compare $mouse_mode making_region]] {
+ set region_id $current_object
+ set x_final [.main_window.display.work_space canvasx %x]
+ set y_final [.main_window.display.work_space canvasy %y]
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ .main_window.display.work_space coords region$region_id $x_init $y_init $x_final $y_final
+ # if finishing a rectangle, initialize its stuff in the array
+ if {$x_init <= $x_final} {
+ set region_data($region_id,x_init) $x_init
+ set region_data($region_id,x_final) $x_final
+ } else {
+ set region_data($region_id,x_final) $x_init
+ set region_data($region_id,x_init) $x_final
+ }
+ if {$y_init <= $y_final} {
+ set region_data($region_id,y_init) $y_init
+ set region_data($region_id,y_final) $y_final
+ } else {
+ set region_data($region_id,y_init) $y_final
+ set region_data($region_id,y_final) $y_init
+ }
+
+ set region_data($region_id,next_region_id) NONE
+
+ make_region_buttons $region_id
+ grab release .main_window.display
+ set mouse_mode NONE
+ set current_object NONE
+ }
+ }
+
+
+
+
+
+ bind .main_window.display.work_space <B1-Motion> {
+ if [expr ! [string compare $mouse_mode making_region]]
+ {
+ set region_id $current_object
+ set x_init $region_data($region_id,x_init)
+ set y_init $region_data($region_id,y_init)
+ set curx [.main_window.display.work_space canvasx %x]
+ set cury [.main_window.display.work_space canvasy %y]
+ .main_window.display.work_space coords region$region_id $x_init $y_init $curx $cury
+ }
+
+ }
+ bind .main_window.display <Leave> {
+ # on leaving the display, release control of the mouse etc.
+ # maybe make it scroll instead?
+ if $started_region {
+ grab release .main_window.display
+ set started_region 0
+ .main_window.display.work_space coords region$region_id 0 0 0 0
+ }
+ }
+}
+
+set arrow_in_progress 0
+proc make_region_buttons { reg_id } {
+ global region_data kill_button_data next_button_data arrow_in_progress current_arrow
+
+ set x_init $region_data($reg_id,x_init)
+ set y_init $region_data($reg_id,y_init)
+
+ set next_num [.main_window.display.work_space create rectangle $x_init $y_init [expr $x_init + 20] [expr $y_init + 20] -fill blue -tags "region$reg_id next_button$reg_id"]
+ set next_button_data($next_num,reg_id) $reg_id
+ .main_window.display.work_space bind next_button$reg_id <Double-2> {
+ set reg_id $next_button_data([.main_window.display.work_space find withtag current],reg_id)
+ if { $arrow_in_progress } {
+ finish_arrow $reg_id
+ } else {
+ set canvas_x [.main_window.display.work_space canvasx %x]
+ set canvas_y [.main_window.display.work_space canvasy %y]
+ start_arrow $reg_id $canvas_x $canvas_y
+ puts stdout "Starting an arrow at $canvas_x $canvas_y"
+ }
+ }
+ set kill_num [.main_window.display.work_space create rectangle [expr $x_init + 20] $y_init [expr $x_init + 40] [expr $y_init + 20] -fill red -tags "region$reg_id kill_button$reg_id"]
+
+ set kill_button_data($kill_num,reg_id) $reg_id
+
+ .main_window.display.work_space bind kill_button$reg_id <Double-2> {
+ set reg_id $kill_button_data([.main_window.display.work_space find withtag current],reg_id)
+ destroy_region $reg_id .main_window.display.work_space
+ }
+}
+
+proc start_arrow { reg_id x_start y_start } {
+ global arrow_in_progress next_button_data region_data current_arrow
+ set path_name .main_window.display.work_space
+# start an arrow in the middle of the little red button
+
+
+
+ set arrow [.main_window.display.work_space create line $x_start $y_start $x_start $y_start -width 3 -arrow last -arrowshape {6.0m 8.0m 1.5m} -fill blue -tags arrow$reg_id]
+
+ set region_data($reg_id,arrow) $arrow
+ set arrow_in_progress 1
+ set current_arrow $arrow
+}
+
+proc destroy_region { reg_id path_name } {
+ $path_name delete region$reg_id
+ puts stdout "Destroying $reg_id"
+}
+
+proc test_canvas { } {
+# just display some junk on the canvas
+ .main_window.display.work_space create text 400 200 -text "Document and text will be displayed here" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill black
+ .main_window.display.work_space create text 400 250 -text "Can be displayed in multiple colors etc." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill red
+ .main_window.display.work_space create text 400 300 -text "Can grab rectangles of stuff here." -font *-times-*-r-normal--24-*-*-*-*-*-*-* -fill green
+ .main_window.display.work_space create text 400 350 -text "other things semi-working: quit and open (under file)" -font *-times-*-*-*--24-*-*-*-*-*-*-* -fill blue
+ .main_window.display.work_space create text 200 200 -font *-times-*-*-*--10-*-*-*-*-*-*-* -fill black -text "If I hit return
+Will it make any difference
+return
+return"
+}
+
+set open_menu_geometry 600x300+300+400
+set current_directory [pwd]
+set open_menu_pattern *
+
+proc popup_open_menu { } {
+# this procedure pops up an interactive box which can be used to open files
+# bug: cannot exit menu without selecting a file
+ global open_menu_geometry open_menu_pattern current_directory
+
+ toplevel .open_menu
+ wm geometry .open_menu $open_menu_geometry
+ wm title .open_menu Open
+
+ # force the user to interact with this box
+ grab set .open_menu
+
+ # directory listing and scrollbar
+ frame .open_menu.dirstuff
+ scrollbar .open_menu.dirstuff.yscroll -command ".open_menu.dirstuff.directory yview"
+ listbox .open_menu.dirstuff.directory -yscrollcommand ".open_menu.dirstuff.yscroll set" -geometry 25x12 -relief raised
+ fill_in_directory_box $current_directory $open_menu_pattern
+
+ bind .open_menu.dirstuff.directory <Double-Button-1> {
+ set file_to_open [selection get]
+ if [file isdirectory $file_to_open] {
+ cd $file_to_open
+ set current_directory [pwd]
+ clear_directory_box
+ puts stdout "Changing to $current_directory"
+ fill_in_directory_box $current_directory $open_menu_pattern
+ } else {
+ puts stdout "Opening file $file_to_open"
+ destroy .open_menu
+ }
+ }
+ # pattern for listings to match
+ frame .open_menu.pattern_match
+ label .open_menu.pattern_match.label -text "Match files of type:"
+ entry .open_menu.pattern_match.entry -width 5 -relief sunken -bd 2 -textvariable open_menu_pattern
+ # refresh the directory listing after user presses return
+ bind .open_menu.pattern_match.entry <Return> {
+ set current_directory [pwd]
+ clear_directory_box
+ fill_in_directory_box $current_directory $open_menu_pattern
+ }
+
+ pack .open_menu.pattern_match.label .open_menu.pattern_match.entry -side left
+ pack .open_menu.dirstuff.directory .open_menu.dirstuff.yscroll -side left -fill y
+ pack .open_menu.pattern_match .open_menu.dirstuff -side top
+ focus .open_menu.pattern_match.entry
+}
+
+
+
+
+
+
+
+proc clear_directory_box { } {
+ .open_menu.dirstuff.directory delete 0 end
+}
+
+proc fill_in_directory_box { dirname {pattern *} } {
+ foreach i [exec ls -aF $dirname] {
+ if [file isdirectory $i] {
+ .open_menu.dirstuff.directory insert end $i
+ } elseif [string match $pattern $i] {
+ .open_menu.dirstuff.directory insert end $i
+ }
+ }
+}
+
+set quit_dialog_geometry 300x150+500+500
+proc popup_quit_dialog_box { } {
+ global quit_dialog_geometry
+
+ toplevel .quit_dialog
+ wm geometry .quit_dialog $quit_dialog_geometry
+ wm title .quit_dialog Quit
+ grab set .quit_dialog
+
+ message .quit_dialog.msg -text "You are about to quit OCR-orama. All changes you have made will be lost."
+ frame .quit_dialog.buttons
+ button .quit_dialog.buttons.ok -text OK -command quit_ok
+ button .quit_dialog.buttons.cancel -text Cancel -command quit_cancel
+ pack .quit_dialog.buttons.ok .quit_dialog.buttons.cancel -side left -expand 1 -fill x
+ pack .quit_dialog.msg .quit_dialog.buttons -side top -fill x
+
+
+}
+
+proc quit_ok { } {
+ destroy .main_window
+ destroy .quit_dialog
+}
+
+proc quit_cancel { } {
+ global command_not_in_progress
+ set command_not_in_progress 1
+ destroy .quit_dialog
+}
+
+init_user_interface
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/project.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,123 @@
+#include <stdarg.h>
+#include "tcl_interface.h"
+#include "project.h"
+#include "histogram.h"
+#include "bitmap.h"
+#include <stdio.h>
+#include <math.h>
+#include <cstdlib>
+#include "status_message.h"
+
+int docommand(char* fmt, ...);
+
+
+
+void draw_bitmap(int x, int y, char* xbm_file)
+{
+ docommand(".t.f.c create bitmap %d %d -bitmap @%s", x, y, xbm_file);
+ docommand("update");
+}
+
+#define LINE_SKIP 5
+#define DRAW_RAYS 0
+#define DEBUG_PROJECT_HISTOGRAM 1
+#define DRAW_PIXELS 0
+#define PRINT_STATUS 1
+
+Histogram* project_histogram(RLEMap* r, double cut_angle)
+{
+ if (ENABLE_USER_INTERFACE)
+ {
+ docommand("update");
+ if(DRAW_RAYS)
+ {
+ docommand(".main_window.display.work_space delete project_ray");
+ }
+ if(DRAW_PIXELS)
+ {
+ docommand(".t.f.c delete project_pixels");
+ }
+ }
+ int num_values = r->imageLength() / LINE_SKIP;
+ int* ret_array = (int*) malloc (sizeof(int) * num_values);
+ for(int current_value = 0; current_value < num_values; current_value++)
+ {
+ ret_array[current_value] = calculate_one_rle_ray_weight(r, cut_angle, current_value*LINE_SKIP); /* the real work */
+ }
+ Histogram* h = new Histogram(num_values - 1, ret_array, cut_angle);
+ if(PRINT_STATUS && ENABLE_USER_INTERFACE)
+ {
+ set_status("Deskewing: @%.3lf degrees, standard deviation = %.3lf", cut_angle, h->get_standard_dev());
+ }
+ if(0)
+ h->display();
+ return h;
+}
+
+inline double deg_to_rad(double deg)
+{
+ return deg * (M_PI / 180.00);
+}
+
+int PRINT_RAYS = 0;
+
+int calculate_one_rle_ray_weight(RLEMap* rlemap, double cut_angle, int row_num)
+{
+ /* cuts through b at cut_angle (cut angle in DEG) , adding up the weights */
+ /* of the pixels in the cut line */
+ /* RLEMap methodology: Translate the angular slope into rise/run slope,
+ find out how many bits (how much run) to read horizontally before
+ jumping upwards one bit. Make use of RLEMap::pixels_between(row,st,fi) */
+ /* possible optimizations: calculate this initial stuff "run_bits" "slope"
+ once for each histogram, pass as args to this */
+
+
+ double slope;
+ double float_run_bits;
+ int y_update;
+ double new_x;
+ double rad_cut_angle = deg_to_rad(cut_angle);
+
+ slope = tan(rad_cut_angle);
+ float_run_bits = fabs((((double)1)/sin(rad_cut_angle)) * cos(rad_cut_angle));
+
+ int ray_weight = 0;
+
+ double cur_x = 0;
+ int cur_y = row_num;
+
+ if(cut_angle < 0)
+ y_update = -1;
+ else
+ y_update = 1;
+
+
+ int image_height = rlemap->imageLength();
+ int image_width = rlemap->imageWidth();
+
+ while(((new_x = cur_x + float_run_bits) < image_width) &&
+ (cur_y >= 0) && (cur_y < image_height))
+ {
+ /* watch out about going past rlemap bounds */
+ ray_weight += rlemap->pixels_between((int)cur_x, (int)new_x, cur_y);
+ cur_x = new_x + 1;
+ cur_y = cur_y + y_update;
+ }
+
+ /* and once more for posterity */
+ if((cur_y >= 0) && (cur_y < image_height) && (cur_x < image_width))
+ ray_weight += rlemap->pixels_between((int)cur_x, image_width, cur_y);
+
+ if(1)
+ if(DRAW_RAYS)
+ {
+ docommand(".main_window.display.work_space create line %d %d %d %d -fill blue -tags {project_ray IMAGE_TAG}", 0, row_num, (int)cur_x, (int)cur_y);
+ }
+ if(PRINT_RAYS)
+ printf("---Ray weight for cut angle %lf = %d\n", cut_angle, ray_weight);
+ return ray_weight;
+}
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/project.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,15 @@
+#ifndef PROJECT
+#define PROJECT 1
+
+#include "RLEPair.h"
+#include "RLEMap.h"
+#include "bitmap.h"
+#include "histogram.h"
+
+void draw_sample(int x, int y);
+Bitmap* sample_to_2d();
+Histogram* project_histogram(Bitmap* b, double cut_angle);
+Histogram* project_histogram(RLEMap* r, double cut_angle);
+int calculate_one_ray_weight(Bitmap* b, double cut_angle, int row_num);
+int calculate_one_rle_ray_weight(RLEMap* rlemap, double cut_angle, int row_num);
+#endif
Binary file reference/ocr-simple/smallp35.tif has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/status_message.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,115 @@
+#include "tcl_interface.h"
+#include "status_message.h"
+
+/* these are ugly, but who cares? */
+
+/* you must reset this to 0 if you want to use these fxns */
+double last_status = 0.0;
+
+void set_string_status(char* s, int partial, int full)
+{
+ set_string_status(s, (double)partial, (double)full);
+}
+
+void set_string_status(char* s, double partial, double full)
+{
+ double fraction = (double)partial / (double)full;
+ if(fraction > 0.9 && last_status < 0.9)
+ {
+ set_status("%s: 90%%...", s);
+ last_status = 0.9;
+ }
+ else if(fraction > 0.8 && last_status < 0.8)
+ {
+ set_status("%s: 80%%...", s);
+ last_status = 0.8;
+ }
+ else if(fraction > 0.7 && last_status < 0.7)
+ {
+ set_status("%s: 70%%...", s);
+ last_status = 0.7;
+ }
+ else if(fraction > 0.6 && last_status < 0.6)
+ {
+ set_status("%s: 60%%...", s);
+ last_status = 0.6;
+ }
+ else if(fraction > 0.5 && last_status < 0.5)
+ {
+ set_status("%s: 50%%...", s);
+ last_status = 0.5;
+ }
+ else if(fraction > 0.4 && last_status < 0.4)
+ {
+ set_status("%s: 40%%...", s);
+ last_status = 0.4;
+ }
+ else if(fraction > 0.3 && last_status < 0.3)
+ {
+ set_status("%s: 30%%...", s);
+ last_status = 0.3;
+ }
+ else if(fraction > 0.2 && last_status < 0.2)
+ {
+ set_status("%s: 20%%...", s);
+ last_status = 0.2;
+ }
+ else if(fraction > 0.1 && last_status < 0.1)
+ {
+ set_status("%s: 10%%...", s);
+ last_status = 0.1;
+ }
+}
+
+void set_display_status(double partial, double full)
+{
+ set_string_status("Displaying Image", partial, full);
+}
+
+void set_display_status(int partial, int full)
+{
+ set_string_status("Displaying Image", partial, full);
+}
+
+void set_read_status(double partial, double full)
+{
+ set_string_status("Reading Image", partial, full);
+}
+
+void set_read_status(int partial, int full)
+{
+ set_string_status("Reading Image", partial, full);
+}
+
+void set_rotation_status(double partial, double full)
+{
+ set_string_status("Rotating Image", partial, full);
+}
+
+void set_rotation_status(int partial, int full)
+{
+ set_string_status("Rotating Image", partial, full);
+}
+
+void set_recognize_status(int p, int f)
+{
+ set_string_status("Recognizing Characters", p, f);
+}
+void set_extract_status(int p, int f)
+{
+ set_string_status("Extracting Words", p, f);
+}
+void set_component_status(int p, int f)
+{
+ set_string_status("Extracting Characters", p, f);
+}
+
+void set_text_display_status(int p, int f)
+{
+ set_string_status("Displaying text", p, f);
+}
+
+void set_spellcheck_status(int p, int f)
+{
+ set_string_status("Spellchecking", p, f);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/status_message.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,25 @@
+#ifndef STATUS_MESSAGE_H
+#define STATUS_MESSAGE_H 1
+#include "tcl_interface.h"
+
+
+extern double last_status;
+
+void set_string_status(char* s, int partial, int full);
+void set_string_status(char* s, double partial, double full);
+void set_display_status(int partial, int full);
+void set_rotation_status(int partial, int full);
+void set_read_status(int partial, int full);
+void set_display_status(double partial, double full);
+void set_rotation_status(double partial, double full);
+void set_read_status(double partial, double full);
+void set_recognize_status(int p, int f);
+void set_extract_status(int p, int f);
+void set_component_status(int p, int f);
+void set_spellcheck_status(int p, int f);
+void set_text_display_status(int p, int f);
+
+#endif
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/system.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,231 @@
+#include "system.h"
+#include "Point.h"
+
+
+// Global Variables
+Point NOPNT(-1,-1); // Used for default entries
+/* Global variables used to fine tune OCR. These can be adjusted
+ without recompiling by setting them in link_vars.tcl */
+int NoiseTolerance = 1; // Minumum number of pixels in a line of text
+int MinLineSize = 5; // Minimum number of rows in a line of text
+int MinVertSeparation = 0; // Minimum number of rows between lines of text
+int MinHorizSeparation = 1; // Minimum number of cols between characters
+int ConfidenceThreshold = 150; //Minimum confidence for some operations
+int JoinTolerance = 6; // Max number of pixels joining fused chars.
+
+
+
+/* Number of properties in property vector for Components **/
+int numProperties = 30;
+
+/* Grid size for gray scale analysis */
+int NumHorizDiv = 5;
+int NumVertDiv = 5;
+
+// The next four are used in character grouping set in Page::extractComponents
+/* Group 0 - amo
+ Group 1 - Descenders yjp
+ Group 2 - Ascenders JPK
+ Group 3 - Both descenders and Ascenders ()
+ Group 4 - floaters * - `
+*/
+unsigned int NumCharGroups=5;
+int MaxVertSize = 50; // Max vert pixels in char (used for baseline)
+int BaseLineTolerance = 10; // How far in 1/x of line size from base is okay
+int TopLineTolerance = 10; // How far in 1/x of line size from top is okay
+ // 20 = 5%, 10 = 10%
+int MinComponentSize = 16; // Minimum number of pixels in smallest character
+
+uchar CharBitsSet[256]; // Table of number of bits set in each num 0-256
+ // Used for determining gray scale and pixel counts
+
+/** Some globals set in learn() or readLearnedChars(). These are just starting
+ values **/
+
+double MaxHWRatio = 0.0;
+double MinHWRatio = 1000;
+int MinWidth = 1000; // Min component width in learned set
+
+
+
+
+Component * LearnedChars; // Learned character averages /** NOT USED **/
+Components * LearnedGroups=NULL; //Learned character list array by group type
+
+
+/*** Some values for TCL/TK interface. These variables can be
+ set in the file link_vars.tcl without recompiling ***/
+
+int ENABLE_USER_INTERFACE = 0;
+int VERY_LOW_CONFIDENCE = 150;
+int LOW_CONFIDENCE = 200;
+int DISPLAY_LINE_BOUNDARIES = 0;
+int DISPLAY_BOUNDING_BOXES = 0; // boxes around components
+int SPELLCHECK = 0;
+int DISPLAY_IMAGE = 1;
+int DESKEW_METHOD = BITMAP_DESKEW;
+double SCALE_FACTOR = 0.5;
+
+void initCharBitsSet()
+// Initializes lookup table for the number of bits set in a uchar
+{
+ int pixCount;
+ for (int c = 0; c<256;c++)
+ {
+ pixCount = 0;
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+ CharBitsSet[c]=pixCount;
+ }
+}
+
+char* backslashify(char* w)
+/* backslashes all $ " [] {} () */
+{
+ int length = strlen(w);
+ char* new_word = (char*)malloc(length*2);
+ int new_word_pos = 0;
+ for(int i = 0; i < length; i++)
+ {
+ if((w[i] == '$') ||
+ (w[i] == '[') ||
+ (w[i] == ']') ||
+ (w[i] == '\\') ||
+ (w[i] == '{') ||
+ (w[i] == '}') ||
+ (w[i] == '(') ||
+ (w[i] == ')') ||
+ (w[i] == ';'))
+ {
+ new_word[new_word_pos] = '\\';
+ new_word[new_word_pos+1] = w[i];
+ new_word_pos += 2;
+ }
+ else
+ {
+ new_word[new_word_pos] = w[i];
+ new_word_pos += 1;
+ }
+ }
+ new_word[new_word_pos] = '\0';
+ return new_word;
+}
+
+void invertBitsInBuffer(uchar * buf, int size)
+{
+ for(int i = 0; i < size; i++)
+ buf[i] = ~buf[i] ;
+
+}
+
+
+short int countBitsSet(uchar c)
+{
+ int pixCount = 0;
+/*
+ for (int i = 7; i >=0; i--)
+ pixCount +=((c>>i)&1); // if this is a black pixel
+*/
+ return CharBitsSet[c];
+}
+
+int pixelsBetween(uchar * ar, int start, int end)
+{
+ // Counts the number of black pixels between start and end
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int pixCount=0, startOffset, endOffset;
+ uchar nextChar;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // count the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ nextChar = ar[i];
+ pixCount += countBitsSet(nextChar);
+ }
+ // Now add in end peices
+ // Get our part of the starting character
+ // Add in just the last part of the char (get rid of hi bits)
+ nextChar = ar[startCharNum] << startOffset;
+ if (startCharNum != endCharNum )
+ {
+ pixCount += countBitsSet(nextChar);
+ // Get our part of the ending character,
+ // Add in just the first endOffset bits (get rid of lo bits)
+ nextChar = ar[endCharNum] >> (8 - endOffset);
+ pixCount += countBitsSet(nextChar);
+ }
+ else
+ {
+ // just shift the adjusted starting char
+ int shift = (8-endOffset)+startOffset;
+ pixCount += countBitsSet(nextChar >> shift);
+ }
+ return pixCount;
+
+}
+
+
+void setRange(uchar ar[], int start, int end)
+// Sets bits from position start to position end
+{
+ int startCharNum = start / 8;
+ int endCharNum = end / 8 ;
+ int startOffset, endOffset;
+
+ startOffset = start - startCharNum*8; // first bit of range in first char
+ endOffset = end- endCharNum*8 + 1 ; // first bit after end in last char
+
+ // set the whole characters
+ for (int i = startCharNum + 1; i < endCharNum; i++)
+ {
+ ar[i] = 255;
+ }
+ // Now set end peices
+ if (startCharNum != endCharNum )
+ {
+ ar[startCharNum] |= (255 >> startOffset);
+ ar[endCharNum] |= (255 << (8 - endOffset));
+ }
+ else // start and end char are the same
+ {
+ char mask = 255 >> startOffset;
+ mask &= 255 << (8-endOffset);
+ ar[endCharNum] |= mask;
+ }
+
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/system.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,177 @@
+#ifndef _SYSTEM_H
+#define _SYSTEM_H
+#include <stdio.h>
+#include <cstdlib>
+#include "/usr/sww/share/include/tiffio.h"
+#include <assert.h>
+#include "Boolean.h"
+#include <iostream.h>
+#include <stream.h>
+
+/* system.h - typedefs and globals for OCRchie system **/
+/** enums and typdefs for OCRSystem include .h files */
+
+
+enum MapStatus { EMPTY, VALID, OPENERROR, READERROR, WRITEERROR,
+ OTHERERROR };
+
+typedef double Angle;
+typedef unsigned char uchar;
+typedef unsigned int Confidence;
+
+typedef unsigned long int Distance;
+
+typedef short int Property;
+typedef char Character;
+
+
+/** The number of properties in the property vector for components */
+extern int numProperties;
+
+/** Some variables for fine tuning OCR */
+/** These variables can be changed without recompiling in link_vars.tcl */
+
+extern int NoiseTolerance; // Minimum number of pixels in row of text
+extern int MinLineSize; // Minimum number of rows in a line of text
+extern int MinVertSeparation; // Minimum number of rows between lines of tex
+extern int MinHorizSeparation; // Minimum number of blank pixels btween chars
+extern int ConfidenceThreshold;// Minimum confidence for some operations
+extern int JoinTolerance; // Maximum number of pixels in a column
+ // joining two fused characters
+extern int MinComponentSize; //Minimum size in pixels of smallest char
+
+// The next four are used in character grouping
+
+extern int MaxVertSize; // Max vert pixels in char (used for baseline)
+extern int BaseLineTolerance; // How far from baseline is okay 1/%linesize
+extern int TopLineTolerance; // How far from topline is okay 1/%linesize
+
+
+/** Variables for user interface can be set in the file link_vars.tcl **/
+
+extern int ENABLE_USER_INTERFACE;
+extern int VERY_LOW_CONFIDENCE;
+extern int LOW_CONFIDENCE;
+extern int DISPLAY_LINE_BOUNDARIES;
+extern int DISPLAY_BOUNDING_BOXES;
+extern int SPELLCHECK;
+extern int DISPLAY_IMAGE;
+#define RLE_DESKEW 1
+#define BITMAP_DESKEW 0;
+#define MINIMUM_SKEW_ANGLE 0.25
+extern int DESKEW_METHOD;
+extern double SCALE_FACTOR;
+
+/***** end link_vars.tcl section ****/
+#include "Point.h"
+#include "list.h"
+#include "BitMap.h"
+#include "Component.h"
+#include "Word.h"
+#include "learn.h"
+#include "RLEPair.h"
+#include "RLEMap.h"
+
+
+
+// Define C++ mode for tiff library
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+
+
+
+extern Point NOPNT; // Just a convenient empty point.
+
+
+/* Constants for the number of horizontal and vertical divisions
+ for determining the gray scale property vector for each component */
+
+extern int NumHorizDiv; //Number of horizontal divisions
+extern int NumVertDiv; //Number of vertical divisions
+
+extern Component * LearnedChars; /** Averaged learned chars NOT USED */
+
+extern unsigned int NumCharGroups;
+extern Components * LearnedGroups; // An array of 5 learned characters
+ //group lists
+
+/** The next 3 are set during learning ***/
+extern double MaxHWRatio; // Max H/W ratio of learned set
+extern double MinHWRatio;
+extern int MinWidth; // minimum component width in learned set
+
+extern uchar CharBitsSet[]; //a table of the number of bits set // in a character
+ // initialized in initCharBitsSet()
+
+
+
+
+
+
+// *** Global function declaration ***
+// functions in RLEMap.cc
+
+void testocr(int argc, char **argv);
+void testRLEMap(char * filename); // Right now in RLEMap.cc
+void testBitMap(char * filename); // Right now in BitMap.cc
+void testpixelsBetween(RLEMap * map); // in RLEMap.cc tests center row
+void printMap(RLEMap * map); // just an ascii "X" display
+
+
+// functions in BitMap.cc
+void testPixelsInRegion(BitMap * bmap, RLEMap * rmap);
+
+// functions in convertMap.cc
+void testConvertMap(char * filename);
+
+// functions in Component.cc
+void printVector(short int vector[], int size); // just prints contents
+
+// prints properties of component c using grayscales from map.
+void testProperties(Component* c, BitMap * map);
+
+
+ // functions in learn.cc
+Components * readLearnedChars(char * tiffFile, char * transFile);
+ /** Read Learned characters uses a tiffFile and a
+ ASCII translation file to read in a component list for
+ for comparison ***/
+
+// in system.cc
+void initCharBitsSet();
+void invertBitsInBuffer(uchar * buf, int size);
+short int countBitsSet(uchar c);
+int pixelsBetween(uchar * ar, int start, int end);
+void setRange(uchar ar[], int start, int end);
+char* backslashify(char*);
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/tcl_interface.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,301 @@
+#include <tcl.h>
+#include <tk.h>
+#include <string.h>
+#include "link.h"
+#include "tcl_interface.h"
+#include "stdio.h"
+#include "Page.h"
+
+extern Page* global_page;
+extern Tcl_Interp* TCL_ip;
+extern Tk_Window main_window;
+extern double SCALE_FACTOR;
+extern int DISPLAY_SPELLING_MISTAKES;
+
+static int page_currently_open = 0;
+
+void scale(int& coordinate)
+{
+ coordinate = (int)(coordinate * SCALE_FACTOR);
+}
+
+int error(char* s)
+{
+/* would like to make this take var num args */
+ printf("Error: %s", s);
+}
+
+int quit_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ printf("Thank you for using OCRchie.\n");
+ exit(0);
+}
+
+int get_skew_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ get_skew(global_page->rmap());
+ return TCL_OK;
+}
+
+int deskew_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(DESKEW_METHOD != -1)
+ {
+ if(global_page->deskew(DESKEW_METHOD))
+ global_page->rmap()->display_intervals("black");
+ return TCL_OK;
+ }
+ else
+ return TCL_OK;
+}
+
+int display_intervals_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->rmap()->display_intervals("black");
+ return TCL_OK;
+}
+
+int page_open_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(ac != 2)
+ return TCL_ERROR;
+ printf("Opening %s\n", argv[1]);
+ if(page_currently_open == 1)
+ {
+ /* should print some message about closing the current one first */
+ return TCL_OK;
+ }
+ global_page = new Page;
+ if(global_page->readMap(argv[1]) != VALID)
+ interp->result = "0";
+ else
+ interp->result = "1";
+ page_currently_open = 1;
+ return TCL_OK;
+}
+
+int get_page_height_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ sprintf(interp->result, "%d", global_page->get_height());
+ /* printf("Interpereter height = %s\n", interp->result); */
+ return TCL_OK;
+}
+
+int get_page_width_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ sprintf(interp->result, "%d", global_page->get_width());
+ /* printf("Interpereter width = %s\n", interp->result); */
+ return TCL_OK;
+}
+
+int zoom_in_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ SCALE_FACTOR = SCALE_FACTOR * 2;
+ global_page->rmap()->display_intervals("black");
+ return TCL_OK;
+}
+
+int zoom_out_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ SCALE_FACTOR = SCALE_FACTOR * 0.5;
+ global_page->rmap()->display_intervals("black");
+ return TCL_OK;
+}
+
+int deallocate_page_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ if(page_currently_open == 0)
+ return TCL_OK; /* don't do anything if their isn't anything open */
+ delete global_page;
+ page_currently_open = 0;
+ return TCL_OK;
+}
+
+int learn_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ printf("Learning from %s and %s\n", argv[1], argv[2]);
+ learn(argv[1], argv[2]);
+ return TCL_OK;
+}
+
+int learn_data_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ printf("Learning data from %s\n", argv[1]);
+ readLearnedGroups(argv[1]);
+ return TCL_OK;
+}
+
+int write_word_pos_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ global_page->writeWordPos(argv[1]);
+ return TCL_OK;
+}
+
+int write_learned_chars_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+{
+ writeLearnedGroups(argv[1]);
+ return TCL_OK;
+}
+
+int recognize_cmd(ClientData clientData, Tcl_Interp *interp, int ac, char** argv)
+
+{
+ /* just in case someone has left something sitting around */
+ docommand(".main_window.edit_window.text_part delete 1.0 end");
+ docommand("set COLORED_WORDS {}");
+ /* printf("Calling deskew\n"); */
+ deskew_cmd(clientData, interp, ac, argv);
+ /* printf("Calling setlines\n"); */
+ global_page->setLines();
+ /* printf("Calling extractComponents\n"); */
+ global_page->extractComponents();
+ /* printf("Calling recognize\n"); */
+ global_page->recognize();
+ /* printf("Calling extractwords\n"); */
+ global_page->extractWords();
+ if(SPELLCHECK)
+ {
+ /* printf("Spellchecking\n"); */
+ global_page->spellcheck();
+ }
+ /* printf("Calling sendwordstotcl\n"); */
+ global_page->send_words_to_tcl();
+ return TCL_OK;
+}
+
+static int
+vdocommand1(char* s)
+{
+/* final function called to do a tcl docommand */
+ int code;
+
+ code = Tcl_Eval(TCL_ip, s);
+ if (code == TCL_ERROR)
+ error(TCL_ip->result);
+ return code;
+}
+
+void update()
+{
+ Tcl_DoOneEvent(TCL_DONT_WAIT);
+}
+
+static int
+vdocommand(int record, char* fmt, va_list args)
+{
+/* helper for docommand */
+ char buf[4097];
+ int code;
+
+ vsprintf(buf, fmt, args);
+ if (strchr(buf, '\?'))
+ error("Huh?");
+ code = vdocommand1(buf);
+ return code;
+}
+
+
+int
+docommand(char* fmt, ...)
+{
+/* do a tcl command, var number of args */
+ va_list args;
+
+ va_start(args, fmt);
+ vdocommand(0, fmt, args);
+ va_end(args);
+}
+
+static int
+vset_status1(char* s)
+{
+/* final function called to do a tcl docommand */
+ int code;
+
+ code = Tcl_Eval(TCL_ip, s);
+ if (code == TCL_ERROR)
+ error(TCL_ip->result);
+ return code;
+}
+
+static int
+vset_status(int record, char* fmt, va_list args)
+{
+/* helper for docommand */
+ char buf[4097];
+ char newbuf[4097];
+ int code;
+
+ vsprintf(buf, fmt, args);
+ sprintf(newbuf, ".main_window.button_bar.msg configure -text \"%s\"", buf);
+ if (strchr(buf, '\?'))
+ error("Huh?");
+ code = vset_status1(newbuf);
+ return code;
+}
+
+int set_status(char* fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vset_status(0, fmt, args);
+ va_end(args);
+ docommand("update");
+}
+
+int mispelled(char* word)
+{
+ int result = docommand("spellcheck %s", word);
+ /* printf("call to spellcheck %s returned %s\n", word, TCL_ip->result); */
+ if(!(strcmp("MISPELLED", TCL_ip->result)))
+ return 1;
+ else
+ return 0;
+}
+
+
+int initialize_interpreter()
+{
+ TCL_ip = Tcl_CreateInterp();
+ Tcl_Init(TCL_ip);
+
+}
+
+int load_user_interface()
+{
+ // main_window = Tk_CreateMainWindow(TCL_ip, NULL, "OCRchie", "OCRchie");
+
+
+ Tk_Init(TCL_ip);
+ main_window = Tk_CreateWindow(TCL_ip, NULL, "OCRchie", "OCRchie");
+ // Tk_MapWindow(main_window); /6/21/00
+ docommand("source new_ui.tcl");
+}
+
+int initialize_command_procs()
+{
+ Tcl_CreateCommand(TCL_ip, "page_open", page_open_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "get_page_height", get_page_height_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "get_page_width", get_page_width_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "LEARN", learn_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "LEARN_DATA", learn_data_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "WRITE_WORD_POS", write_word_pos_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "WRITE_LEARNED_CHARS", write_learned_chars_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "FIND_LINES_AND_RECOGNIZE", recognize_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "GET_SKEW", get_skew_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DESKEW", deskew_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DISPLAY_INTERVALS", display_intervals_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "QUIT", quit_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ZOOM_IN", zoom_in_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "ZOOM_OUT", zoom_out_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ Tcl_CreateCommand(TCL_ip, "DEALLOCATE_PAGE", deallocate_page_cmd, (ClientData) NULL, (Tcl_CmdDeleteProc*) NULL);
+ printf("Done initializing new tcl commands\n");
+}
+
+int initialize_link_vars()
+{
+ init_link_vars(); /* what a nice name */
+ docommand("source link_vars.tcl");
+ printf("Done initializing link variables\n");
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/tcl_interface.h Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,35 @@
+#ifndef TCL_INTERFACE
+#define TCL_INTERFACE 1
+#include <stdarg.h>
+#include <stdio.h>
+extern double SCALE_FACTOR;
+int mispelled(char* s);
+int error(char* s);
+int docommand(char* fmt, ...);
+int set_status(char* fmt, ...);
+int initialize_interpreter();
+int load_user_interface();
+int initialize_command_procs();
+int initialize_link_vars();
+void update();
+void scale(int& coordinate);
+#endif
+/*
+These commands can be called from Tcl
+(things in quotes are the tcl names, others are c++ functions)
+
+ "page_open", page_open_cmd,
+ "get_page_height", get_page_height_cmd,
+ "get_page_width", get_page_width_cmd,
+
+ "FIND_LINES_AND_RECOGNIZE", recognize_cmd,
+ "GET_SKEW", get_skew_cmd,
+ "DESKEW", deskew_cmd,
+ "DISPLAY_INTERVALS", display_intervals_cmd,
+ "QUIT", quit_cmd,
+ "ZOOM_IN", zoom_in_cmd,
+ "ZOOM_OUT", zoom_out_cmd,
+ "DEALLOCATE_PAGE", deallocate_page_cmd,
+
+
+*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/testocr.cc Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,110 @@
+#include "system.h"
+#include "Page.h"
+#include <cstdlib>
+#include <iostream.h>
+#include "tcl_interface.h"
+
+void draw_bitmap(int x, int y, char * xbmfile);
+/* int docommand(char* fmt, ...); */
+
+void testocr(int argc, char ** argv)
+/*** Just some test stuff for right now ***/
+{
+ Page * hello = new Page;
+ hello->readMap("hello.tif");
+
+ cout << "Test PixelsInRegion Functions"<< endl;
+ // This test has ranges appropriate for hello.tif
+ testPixelsInRegion(hello->bmap(), hello->rmap());
+ cout << endl;
+
+ cout << "Now test grayscale" << endl;
+ cout << "NumPixels in Region (3,5) .. (52,19) ";
+ cout << hello->bmap()->pixelsInRegion(Point(3,5), Point(52,19)) << endl;
+ cout << " Area: " << (52 -3 +1)*(19-5+1) << endl;
+ cout << " GrayScale: ";
+ cout << hello->bmap()->grayScale(Point(3, 5), Point(52,19)) << endl;
+
+ cout << endl << "Now lets look at the property vector for this region" << endl;
+ Component* c = new Component(Point(3, 5),Point(52,19));
+ testProperties(c, hello->bmap());
+ hello->bmap()->writeTclMap("hello",Point(0,0),Point(0,0),0);
+
+ cout << "Now some testing with pagebw.tif " << endl;
+ testLearn();
+ Page * testPage;
+ testPage = new Page;
+ cout << "reading map" << endl;
+ testPage->readMap("/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/bigtiff.tif");
+ NoiseTolerance = 10;
+ cout << "Finding the lines." << endl;
+ testPage->setLines();
+
+ if ((argc > 2) && !(strcmp(argv[2],"-ugly")));
+ else
+ {
+ docommand(".t.f.c create bitmap 637 825 -bitmap @/amd/nfs/cochise/home/ee/cs169/fa95/class/cs169-ab/tif/pagebw");
+ docommand("update");
+
+ int centerline, width;
+ for(int j=0; j < testPage->fnumLines; j++)
+ {
+ centerline = (testPage->flineinfo[j].fendrow + testPage->flineinfo[j + 1].fstartrow) / 2;
+ width = testPage->flineinfo[j + 1].fstartrow - testPage->flineinfo[j].fendrow;
+ docommand(".t.f.c create line %d %d %d %d -width %d -fill blue -tags project_ray -stipple @/usr/sww/share/tclX-7.3a/tkX/3.6a/demos/bitmaps/grey.25", 0, centerline, testPage->bmap()->imageWidth(), centerline, width);
+ update();
+ }
+ for(int i= 0; i < 500; i++)
+ {
+ update();
+ }
+ }
+
+ testPage->extractComponents();
+ cout << "avgSpacing - " << testPage->avgSpacing() << endl;
+
+// testPage->printComponents();
+
+
+ testPage->recognize();
+ testPage->extractWords();
+ testPage->printWords();
+ delete testPage;
+ delete hello;
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+void testExtractAndMerge()
+{
+ Page * testPage = new Page;
+ RLEMap * newMap = new RLEMap;
+
+ testPage->readMap("train.tif");
+ testPage->rmap()->printPairs(142,145);
+ RLEPairs * p = testPage->rmap()->row(144)->extract(504, 520);
+ testPage->rmap()->row(143)->merge(p);
+ testPage->rmap()->printPairs(142,145);
+ testPage->setLines();
+ testPage->extractComponents();
+ testPage->printComponents();
+
+}
+
+
+
+
+
Binary file reference/ocr-simple/train.tif has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/train.txt Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,3 @@
+a b c d e f g h i j k l m n o p q r s t u v w x y z : ;
+A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+0 1 2 3 4 5 6 7 8 9 0 ~ ! @ # $ % % ^ & * ( ) + = - , . < > / ? '
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/tt.tcl Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,63 @@
+#!/usr/sww/bin/wish4.0-b4
+set COLORED_WORDS {}
+text .t -background white -foreground black
+pack .t
+set LOW_PRECISION_BACKGROUND green
+set MISPELLED_BACKGROUND blue
+set UNKNOWN_CHAR_BACKGROUND red
+.t tag configure LOW_PRECISION -background $LOW_PRECISION_BACKGROUND
+.t tag configure MISPELLED -background $MISPELLED_BACKGROUND
+.t tag configure UNKNOWN_CHAR -background $UNKNOWN_CHAR_BACKGROUND
+proc addword { w {xpos 0} {ypos 0} {status OK}} {
+ global COLORED_WORDS
+ puts $status
+ if { ![string compare $status OK] } {
+ .t insert end "$w "
+ .t mark set insert end
+ .t mark set insert "end -2 char"
+ .t tag add $status "insert wordstart" "insert wordend"
+ .t tag add x$xpos "insert wordstart" "insert wordend"
+ .t tag add y$ypos "insert wordstart" "insert wordend"
+ .t mark set insert end
+ } elseif { ![string compare $status LOW_PRECISION] || ![string compare $status MISPELLED] || ![string compare $status UNKNOWN_CHAR] } {
+ .t insert end "$w "
+ .t mark set insert end
+ .t mark set insert "end -3 char"
+ .t tag add $status "insert wordstart" "insert wordend"
+ .t tag add x$xpos "insert wordstart" "insert wordend"
+ .t tag add y$ypos "insert wordstart" "insert wordend"
+ lappend COLORED_WORDS [.t index insert]
+ .t mark set insert end
+ } else {
+ puts stdout "Unknown word status for $w: $status"
+ .t insert end "$w UNKNOWNSTATUS? "
+ }
+}
+
+proc pop_colored_words { } {
+ global COLORED_WORDS
+ set x [lindex $COLORED_WORDS 0]
+ if {[llength $COLORED_WORDS] == 1} {
+ set COLORED_WORDS {}
+ } elseif {[llength $COLORED_WORDS] == 0} {
+ set COLORED_WORDS $COLORED_WORDS
+ } else {
+ set COLORED_WORDS [lrange $COLORED_WORDS 1 [llength $COLORED_WORDS]]
+ }
+ return $x
+}
+
+bind .t <Tab> {
+ if {[llength $COLORED_WORDS] == 0} {
+ puts stdout "No more words"
+ } else {
+ .t mark set insert [pop_colored_words]
+ .t mark set insert "insert wordstart"
+ set x [.t index insert]
+ puts "New index is $x"
+ .t see insert
+ set local_tags [.t tag names insert]
+ puts "Tags at this place: $local_tags"
+ }
+ break
+}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reference/ocr-simple/wordpos.out Thu May 18 23:12:51 2006 +0200
@@ -0,0 +1,9 @@
+ 33 7 255 27 abcdefghijklmnopqrstuvwxyz:
+ 1935 7 75 2 ;N
+ 29 108 255 26 ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ 32 208 255 1 0
+ 116 208 255 1 1
+ 182 207 255 10 234567890~
+ 947 207 255 1 !
+ 1001 200 255 8 @#$%%^&*
+ 1621 207 255 12 ()+=-,.<>/?'