rework HEXA_NUM scanning strategy to avoid excessive token lengths

git-svn-id: https://urjtag.svn.sourceforge.net/svnroot/urjtag/trunk@1674 b68d4a1b-bc3d-0410-92ed-d4ac073336b7
16 years ago · 6dbe8de1ff
parent bd676dd428
commit 6dbe8de1ff
4 changed files with 97 additions and 25 deletions
--- a/urjtag/ChangeLog
+++ b/urjtag/ChangeLog
@ -1,3 +1,8 @@
+2009-10-28  Arnim Laeuger  <arniml>
+
+  * src/svf/svf.h, src/svf/svf_bison.y, src/svf/svf_flex.l:
+    rework HEXA_NUM scanning strategy to avoid excessive token lengths
+
 2009-10-27  Arnim Laeuger  <arniml>

  * src/tap/cable/ft2232.c: default vid/pid for Signalyzer (Adam Megacz)
--- a/urjtag/src/svf/svf.h
+++ b/urjtag/src/svf/svf.h
@ -37,6 +37,12 @@ enum generic_irdr_coding
 };


+struct hexa_frag
+{
+    char   *buf;
+    size_t  buflen;
+    size_t  strlen;
+};
 struct tdval
 {
    int token;
--- a/urjtag/src/svf/svf_bison.y
+++ b/urjtag/src/svf/svf_bison.y
@ -34,6 +34,8 @@
 #include <stdio.h>
 #include <stdlib.h>

+#include <string.h>
+
 #include <urjtag/log.h>

 #include "svf.h"
@ -58,12 +60,13 @@ static void urj_svf_free_ths_params(struct ths_params *);
  double dvalue;
  char  *cvalue;
  int    ivalue;
+  struct hexa_frag hexa_frag;
  struct tdval tdval;
  struct tcval *tcval;
 }


-%token IDENTIFIER NUMBER HEXA_NUM VECTOR_STRING
+%token IDENTIFIER NUMBER HEXA_NUM_FRAGMENT VECTOR_STRING

 %token EMPTY
 %token ENDDR ENDIR 
@ -79,9 +82,11 @@ static void urj_svf_free_ths_params(struct ths_params *);
 %token SVF_EOF 0    /* SVF_EOF must match bison's token YYEOF */

 %type <dvalue> NUMBER
+%type <cvalue> HEXA_NUM_FRAGMENT
 %type <tdval>  runtest_clk_count
 %type <token>  runtest_run_state_opt
 %type <token>  runtest_end_state_opt
+%type <hexa_frag> hexa_num_sequence

 %%

@ -267,27 +272,53 @@ ths_param_list
 ; 

 ths_opt_param
-            : TDI   HEXA_NUM
+            : TDI   '(' hexa_num_sequence ')'
              {
-                priv_data->parser_params.ths_params.tdi = $<cvalue>2;
+                priv_data->parser_params.ths_params.tdi = $3.buf;
              }

-            | TDO   HEXA_NUM
+            | TDO   '(' hexa_num_sequence ')'
              {
-                priv_data->parser_params.ths_params.tdo = $<cvalue>2;
+                priv_data->parser_params.ths_params.tdo = $3.buf;
              }

-            | MASK  HEXA_NUM
+            | MASK  '(' hexa_num_sequence ')'
              {
-                priv_data->parser_params.ths_params.mask = $<cvalue>2;
+                priv_data->parser_params.ths_params.mask = $3.buf;
              }

-            | SMASK HEXA_NUM
+            | SMASK '(' hexa_num_sequence ')'
              {
-                priv_data->parser_params.ths_params.smask = $<cvalue>2;
+                priv_data->parser_params.ths_params.smask = $3.buf;
              }
 ;

+hexa_num_sequence
+           : HEXA_NUM_FRAGMENT
+             {
+                 $$.buf    = $1;
+                 $$.strlen = strlen ($1);
+                 $$.buflen = $$.strlen + 1;
+             }
+           | hexa_num_sequence HEXA_NUM_FRAGMENT
+             {
+#define REALLOC_STEP (1 << 16)
+                 size_t req_len = $1.strlen + strlen ($2);
+                 if ($1.buflen <= req_len) {
+                     size_t newlen = req_len - $1.buflen < REALLOC_STEP ?
+                         $1.buflen + REALLOC_STEP : req_len + 1;
+                     $1.buf = (char *)realloc ($1.buf, newlen);
+                     $1.buflen = newlen;
+                 }
+                 if ($1.buf != NULL) {
+                     strcat ($1.buf, $2);
+                     $1.strlen += strlen ($2);
+                 }
+                 free ($2);
+                 $$ = $1;
+             }
+;
+
 stable_state
            : RESET 
            | IDLE
@ -413,8 +444,8 @@ direction
 yyerror (YYLTYPE *locp, urj_svf_parser_priv_t *priv_data, urj_chain_t *chain,
         const char *error_string)
 {
-    urj_log (URJ_LOG_LEVEL_ERROR, "Error occurred for SVF command %s.\n",
-             error_string);
+    urj_log (URJ_LOG_LEVEL_ERROR, "Error occurred for SVF command, line %d, column %d-%d:\n %s.\n",
+             locp->first_line, locp->first_column, locp->last_column, error_string);
 }


--- a/urjtag/src/svf/svf_flex.l
+++ b/urjtag/src/svf/svf_flex.l
@ -63,7 +63,7 @@ int yywrap(yyscan_t scanner)

 #define YY_USER_INIT \
    do { \
-        yylloc->first_line = yylloc->last_line = yylloc->first_column = yylloc->last_column = 0; \
+        yylloc->first_line = yylloc->last_line = yylloc->first_column = yylloc->last_column = 1; \
    } while (0)

 %}
@ -77,11 +77,13 @@ WSPACE          [ \t\r]
 COMMENT         (!.*)|("//".*)[^\n]

 %s expect_vector
+%s expect_hexa_num_paren
+%s expect_hexa_num

 %%


-{LETTER}+[0-9A-Za-z_]* {
+<INITIAL>{LETTER}+[0-9A-Za-z_]* {
  /* token is a keyword or identifier */
  int keyw;

@ -89,15 +91,23 @@ COMMENT         (!.*)|("//".*)[^\n]
  keyw = map_keyw_ident(yylval, yytext);

  /* enable detection of VECTOR_STRING when this is a PIO command */
-  if (keyw == PIO) {
-    BEGIN(expect_vector);
+  switch (keyw) {
+  case PIO:
+      BEGIN(expect_vector);
+      break;
+  case TDI:
+  case TDO:
+  case MASK:
+  case SMASK:
+      BEGIN(expect_hexa_num_paren);
+      break;
  }

  return(keyw);
 } /* end of keyword or identifier */


-{DIGIT}+(\.{DIGIT}+)?([eE][-+]?{DIGIT}+)? {
+<INITIAL>{DIGIT}+(\.{DIGIT}+)?([eE][-+]?{DIGIT}+)? {
  /* token is a real number */

  char *real_string = strdup(yytext);
@ -125,7 +135,7 @@ COMMENT         (!.*)|("//".*)[^\n]
 } /* end of real number */


-<expect_vector>"("{WSPACE}*[\n\rHhLlZzUuDdXx \t\r]+{WSPACE}*")" {
+<expect_vector>"("{WSPACE}*[\n\rHhLlZzUuDdXx \t]+{WSPACE}*")" {
  /* There is an overlap of VECTOR_STRING and HEXA_NUM when the string
     contains only 'd' or 'D'. To prevent complicated parsing rules,
     the lexer is instructed to detect VECTOR_STRING only when a PIO
@ -137,24 +147,30 @@ COMMENT         (!.*)|("//".*)[^\n]
  fix_yylloc_nl(yylloc, yytext, yyget_extra(yyscanner));
  align_string(yytext);

-  cstring = calloc(strlen(yytext) + 1, sizeof(char));
+  cstring = malloc(strlen(yytext) + 1);
  strcpy(cstring, yytext);
  yylval->cvalue = cstring;
  return(VECTOR_STRING);
 } /* end of vector string */


-"("{WSPACE}*[\n\rA-Fa-f0-9 \t\r]+{WSPACE}*")" {
-  /* token is a hexadecimal value (2) */
+<expect_hexa_num>[A-Fa-f0-9 \n]{1,1024} {
+  /* HEXA_NUM_FRAGMENT is restricted in size to avoid excessive token length.
+     Utilizing the {n,m} regex operator is probably not the right way
+     to do this.
+     1024 is chosen arbitrarily, increasing to e.g. 4096 enhances scanner
+     performance, trading off against huge table sizes.
+     This whole strategy needs to be revisited with support of flex experts.
+  */
  char *cstring;

  fix_yylloc_nl(yylloc, yytext, yyget_extra(yyscanner));
  align_string(yytext);

-  cstring = calloc(strlen(yytext) + 1, sizeof(char));
+  cstring = (char *)malloc(strlen(yytext) + 1);
  strcpy(cstring, yytext);
  yylval->cvalue = cstring;
-  return(HEXA_NUM);
+  return(HEXA_NUM_FRAGMENT);
 } /* end of hexadecimal value */


@ -170,12 +186,26 @@ COMMENT         (!.*)|("//".*)[^\n]
 } /* end of comment */


-[()] {
-  /* left or right parenthes */
+<INITIAL>"(" {
  fix_yylloc(yylloc, yytext);
  return(yytext[0]);
 } /* end of left or right parenthesis */

+<expect_hexa_num_paren>"(" {
+  /* scanning until a left parenthesis under this start condition implicitly
+     skips whitespace that would have been attributed to HEXA_NUM_FRAGMENT
+     otherwise */
+  fix_yylloc(yylloc, yytext);
+  /* now hand over to HEXA_NUM_FRAGMENT */
+  BEGIN(expect_hexa_num);
+  return(yytext[0]);
+} /* end of left or right parenthesis */
+")" {
+  fix_yylloc(yylloc, yytext);
+  BEGIN(INITIAL);
+  return(yytext[0]);
+} /* end of left or right parenthesis */
+

 \n {
  /* token is a new line character */
@ -190,7 +220,7 @@ COMMENT         (!.*)|("//".*)[^\n]
 ; {
  /* token is end of statement character */

-  /* release expect_vector */
+  /* release start condition */
  BEGIN(INITIAL);

  fix_yylloc(yylloc, yytext);