Separated backslash C parser library.
authorviric@llimona
Thu, 31 May 2007 00:17:52 +0200
changeset 7 fcde17ef6af6
parent 6 7dd3bdec0ad2
child 8 4ecd557ebebf
child 9 7f47f2295c44
Separated backslash C parser library.
Makefile
c_str.c
sreplace.c
sreplace.h
--- a/Makefile	Tue May 29 18:52:18 2007 +0200
+++ b/Makefile	Thu May 31 00:17:52 2007 +0200
@@ -1,6 +1,6 @@
 CFLAGS?=-g -O2
 all: sreplace
-sreplace: sreplace.o
+sreplace: sreplace.o c_str.o
 
 clean:
 	rm -f sreplace sreplace.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c_str.c	Thu May 31 00:17:52 2007 +0200
@@ -0,0 +1,150 @@
+#include <stdio.h>
+
+#include "sreplace.h"
+
+/* Complete set of backslash sequences:
+
+ From K&R, The C Programming Language, Chapter 2, Constants.
+
+ \b backspace
+ \?  question mark
+ \f formfeed
+  \' single quote
+ \n newline
+ \" double quote
+ \r carriage return
+ \ooo octal number
+ \t horizontal tab
+ \xhh hexadecimal number 
+ \v vertical tab
+
+ (\\ backslash)
+*/
+
+/* Returns length */
+int parse_backslashes(unsigned char *str)
+{
+  int was_backslash = 0;
+  int was_octal = 0; /* 0 to 3 */
+  int was_hex = 0; /* 0 to 2 */
+  unsigned char *write_str = str;
+  unsigned char *start = str;
+  unsigned char newchar;
+
+  while (*str != 0)
+  {
+    if (*str == '\\' && !was_backslash)
+      was_backslash = 1;
+    else
+    {
+      if (was_octal)
+      {
+        if (was_octal < 3 && *str >= '0' && *str <= '7')
+        {
+          newchar = (newchar * 8) + (*str - '0');
+          was_octal += 1;
+          if (was_octal == 3)
+          {
+            *(write_str++) = newchar;
+            was_octal = 0;
+          }
+        } else /* Didn't come octal char. End of oct string. */
+        {
+          *(write_str++) = newchar;
+          *(write_str++) = *str;
+          was_octal = 0;
+        }
+      }
+      else if (was_hex)
+      {
+        unsigned char base = 0;
+
+        if (*str >= '0' && *str <= '9')
+          base = '0';
+        else if (*str >= 'a' && *str <= 'f')
+          base = 'a' - 10;
+        else if (*str >= 'A' && *str <= 'F')
+          base = 'A' - 10;
+
+        if (base != 0) /* Came hex char */
+        {
+          newchar = (newchar * 16) + (*str - base);
+          was_hex += 1;
+
+          if (was_hex == 3) /* End of hex string */
+          {
+            *(write_str++) = newchar;
+            was_hex = 0;
+          }
+        }
+        else /* Non-hex char came. End of hex string */
+        {
+          *(write_str++) = newchar;
+          *(write_str++) = *str;
+          was_hex = 0;
+        }
+      }
+      else if (was_backslash)
+      { switch(*str)
+        {
+          case '\\':
+            *(write_str++) = '\\';
+            break;
+          case 'n':
+            *(write_str++) = '\n';
+            break;
+          case 't':
+            *(write_str++) = '\t';
+            break;
+          case 'r':
+            *(write_str++) = '\r';
+            break;
+          case 'v':
+            *(write_str++) = '\v';
+            break;
+          case 'f':
+            *(write_str++) = '\f';
+            break;
+          case 'b':
+            *(write_str++) = '\b';
+            break;
+          case '0':
+          case '1':
+          case '2':
+          case '3':
+          case '4':
+          case '5':
+          case '6':
+          case '7':
+            was_octal = 1;
+            newchar = 0;
+            break;
+          case 'x':
+            was_hex = 1;
+            newchar = 0;
+            break;
+          default:
+            *(write_str++) = *str;
+        }
+        was_backslash = 0;
+      }
+      else
+        *(write_str++) = *str;
+    }
+    ++str;
+  }
+  *(write_str) = '\0';
+
+  /* We calculate length with distance between the last
+   * written char and the start of the string */
+  return write_str - start;
+}
+
+void print_hex(FILE *out, const struct String *str)
+{
+    int i;
+
+    fprintf(out, "Length: %i\n", str->length);
+    for (i = 0; i < str->length; ++i)
+        fprintf(out, "%02hhx", str->ptr[i]);
+}
--- a/sreplace.c	Tue May 29 18:52:18 2007 +0200
+++ b/sreplace.c	Thu May 31 00:17:52 2007 +0200
@@ -8,18 +8,14 @@
 #include <assert.h>
 #include <string.h>
 
+#include "sreplace.h"
+
 enum {
   BUFFER_SIZE = 2048
 };
 
 static FILE * input, * output;
 
-struct String
-{
-  char *ptr;
-  int length;
-};
-
 struct CmpState
 {
   struct String *str;
@@ -241,45 +237,6 @@
   printf("usage: %s OLD_STR NEW_STR\n", pname);
 }
 
-static void parse_backslashes(char *str)
-{
-  int was_backslash = 0;
-  char *write_str = str;
-
-  while (*str != 0)
-  {
-    if (*str == '\\')
-      was_backslash = 1;
-    else
-    {
-      if (was_backslash)
-      {
-        switch(*str)
-        {
-          case '\\':
-            *(write_str++) = '\\';
-            break;
-          case 'n':
-            *(write_str++) = '\n';
-            break;
-          case 't':
-            *(write_str++) = '\t';
-            break;
-          default:
-            *(write_str++) = *str;
-        }
-      }
-      else
-      {
-        *(write_str++) = *str;
-      }
-      was_backslash = 0;
-    }
-    ++str;
-  }
-  *(write_str++) = '\0';
-}
-
 static void process_parameters(int argc, char **argv)
 {
   if (argc != 3)
@@ -291,11 +248,15 @@
   old_str.ptr = argv[1];
   new_str.ptr = argv[2];
 
-  parse_backslashes(old_str.ptr);
-  parse_backslashes(new_str.ptr);
+  old_str.length = parse_backslashes(old_str.ptr);
+  fprintf(stderr, "OLD: ");
+  print_hex(stderr, &old_str);
+  fprintf(stderr, "\n");
 
-  old_str.length = strlen(old_str.ptr);
-  new_str.length = strlen(new_str.ptr);
+  new_str.length = parse_backslashes(new_str.ptr);
+  fprintf(stderr, "NEW: ");
+  print_hex(stderr, &new_str);
+  fprintf(stderr, "\n");
 }
 
 int main(int argc, char ** argv)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sreplace.h	Thu May 31 00:17:52 2007 +0200
@@ -0,0 +1,8 @@
+struct String
+{
+  char *ptr;
+  int length;
+};
+
+int parse_backslashes(unsigned char *str);
+void print_hex(FILE *out, const struct String *str);