From 6dbe8de1fff8e6a5bdf3891352556ba8ceeb7602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnim=20L=C3=A4uger?= Date: Tue, 27 Oct 2009 23:22:22 +0000 Subject: [PATCH] rework HEXA_NUM scanning strategy to avoid excessive token lengths git-svn-id: https://urjtag.svn.sourceforge.net/svnroot/urjtag/trunk@1674 b68d4a1b-bc3d-0410-92ed-d4ac073336b7 --- urjtag/ChangeLog | 5 ++++ urjtag/src/svf/svf.h | 6 ++++ urjtag/src/svf/svf_bison.y | 53 ++++++++++++++++++++++++++-------- urjtag/src/svf/svf_flex.l | 58 +++++++++++++++++++++++++++++--------- 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/urjtag/ChangeLog b/urjtag/ChangeLog index 7604c06b..fba193d4 100644 --- a/urjtag/ChangeLog +++ b/urjtag/ChangeLog @@ -1,3 +1,8 @@ +2009-10-28 Arnim Laeuger + + * src/svf/svf.h, src/svf/svf_bison.y, src/svf/svf_flex.l: + rework HEXA_NUM scanning strategy to avoid excessive token lengths + 2009-10-27 Arnim Laeuger * src/tap/cable/ft2232.c: default vid/pid for Signalyzer (Adam Megacz) diff --git a/urjtag/src/svf/svf.h b/urjtag/src/svf/svf.h index e7a921b2..64807c67 100644 --- a/urjtag/src/svf/svf.h +++ b/urjtag/src/svf/svf.h @@ -37,6 +37,12 @@ enum generic_irdr_coding }; +struct hexa_frag +{ + char *buf; + size_t buflen; + size_t strlen; +}; struct tdval { int token; diff --git a/urjtag/src/svf/svf_bison.y b/urjtag/src/svf/svf_bison.y index 94ff199e..e2e57cba 100644 --- a/urjtag/src/svf/svf_bison.y +++ b/urjtag/src/svf/svf_bison.y @@ -34,6 +34,8 @@ #include #include +#include + #include #include "svf.h" @@ -58,12 +60,13 @@ static void urj_svf_free_ths_params(struct ths_params *); double dvalue; char *cvalue; int ivalue; + struct hexa_frag hexa_frag; struct tdval tdval; struct tcval *tcval; } -%token IDENTIFIER NUMBER HEXA_NUM VECTOR_STRING +%token IDENTIFIER NUMBER HEXA_NUM_FRAGMENT VECTOR_STRING %token EMPTY %token ENDDR ENDIR @@ -79,9 +82,11 @@ static void urj_svf_free_ths_params(struct ths_params *); %token SVF_EOF 0 /* SVF_EOF must match bison's token YYEOF */ %type NUMBER +%type HEXA_NUM_FRAGMENT %type runtest_clk_count %type runtest_run_state_opt %type runtest_end_state_opt +%type hexa_num_sequence %% @@ -267,27 +272,53 @@ ths_param_list ; ths_opt_param - : TDI HEXA_NUM + : TDI '(' hexa_num_sequence ')' { - priv_data->parser_params.ths_params.tdi = $2; + priv_data->parser_params.ths_params.tdi = $3.buf; } - | TDO HEXA_NUM + | TDO '(' hexa_num_sequence ')' { - priv_data->parser_params.ths_params.tdo = $2; + priv_data->parser_params.ths_params.tdo = $3.buf; } - | MASK HEXA_NUM + | MASK '(' hexa_num_sequence ')' { - priv_data->parser_params.ths_params.mask = $2; + priv_data->parser_params.ths_params.mask = $3.buf; } - | SMASK HEXA_NUM + | SMASK '(' hexa_num_sequence ')' { - priv_data->parser_params.ths_params.smask = $2; + priv_data->parser_params.ths_params.smask = $3.buf; } ; +hexa_num_sequence + : HEXA_NUM_FRAGMENT + { + $$.buf = $1; + $$.strlen = strlen ($1); + $$.buflen = $$.strlen + 1; + } + | hexa_num_sequence HEXA_NUM_FRAGMENT + { +#define REALLOC_STEP (1 << 16) + size_t req_len = $1.strlen + strlen ($2); + if ($1.buflen <= req_len) { + size_t newlen = req_len - $1.buflen < REALLOC_STEP ? + $1.buflen + REALLOC_STEP : req_len + 1; + $1.buf = (char *)realloc ($1.buf, newlen); + $1.buflen = newlen; + } + if ($1.buf != NULL) { + strcat ($1.buf, $2); + $1.strlen += strlen ($2); + } + free ($2); + $$ = $1; + } +; + stable_state : RESET | IDLE @@ -413,8 +444,8 @@ direction yyerror (YYLTYPE *locp, urj_svf_parser_priv_t *priv_data, urj_chain_t *chain, const char *error_string) { - urj_log (URJ_LOG_LEVEL_ERROR, "Error occurred for SVF command %s.\n", - error_string); + urj_log (URJ_LOG_LEVEL_ERROR, "Error occurred for SVF command, line %d, column %d-%d:\n %s.\n", + locp->first_line, locp->first_column, locp->last_column, error_string); } diff --git a/urjtag/src/svf/svf_flex.l b/urjtag/src/svf/svf_flex.l index 7afda935..37e996ae 100644 --- a/urjtag/src/svf/svf_flex.l +++ b/urjtag/src/svf/svf_flex.l @@ -63,7 +63,7 @@ int yywrap(yyscan_t scanner) #define YY_USER_INIT \ do { \ - yylloc->first_line = yylloc->last_line = yylloc->first_column = yylloc->last_column = 0; \ + yylloc->first_line = yylloc->last_line = yylloc->first_column = yylloc->last_column = 1; \ } while (0) %} @@ -77,11 +77,13 @@ WSPACE [ \t\r] COMMENT (!.*)|("//".*)[^\n] %s expect_vector +%s expect_hexa_num_paren +%s expect_hexa_num %% -{LETTER}+[0-9A-Za-z_]* { +{LETTER}+[0-9A-Za-z_]* { /* token is a keyword or identifier */ int keyw; @@ -89,15 +91,23 @@ COMMENT (!.*)|("//".*)[^\n] keyw = map_keyw_ident(yylval, yytext); /* enable detection of VECTOR_STRING when this is a PIO command */ - if (keyw == PIO) { - BEGIN(expect_vector); + switch (keyw) { + case PIO: + BEGIN(expect_vector); + break; + case TDI: + case TDO: + case MASK: + case SMASK: + BEGIN(expect_hexa_num_paren); + break; } return(keyw); } /* end of keyword or identifier */ -{DIGIT}+(\.{DIGIT}+)?([eE][-+]?{DIGIT}+)? { +{DIGIT}+(\.{DIGIT}+)?([eE][-+]?{DIGIT}+)? { /* token is a real number */ char *real_string = strdup(yytext); @@ -125,7 +135,7 @@ COMMENT (!.*)|("//".*)[^\n] } /* end of real number */ -"("{WSPACE}*[\n\rHhLlZzUuDdXx \t\r]+{WSPACE}*")" { +"("{WSPACE}*[\n\rHhLlZzUuDdXx \t]+{WSPACE}*")" { /* There is an overlap of VECTOR_STRING and HEXA_NUM when the string contains only 'd' or 'D'. To prevent complicated parsing rules, the lexer is instructed to detect VECTOR_STRING only when a PIO @@ -137,24 +147,30 @@ COMMENT (!.*)|("//".*)[^\n] fix_yylloc_nl(yylloc, yytext, yyget_extra(yyscanner)); align_string(yytext); - cstring = calloc(strlen(yytext) + 1, sizeof(char)); + cstring = malloc(strlen(yytext) + 1); strcpy(cstring, yytext); yylval->cvalue = cstring; return(VECTOR_STRING); } /* end of vector string */ -"("{WSPACE}*[\n\rA-Fa-f0-9 \t\r]+{WSPACE}*")" { - /* token is a hexadecimal value (2) */ +[A-Fa-f0-9 \n]{1,1024} { + /* HEXA_NUM_FRAGMENT is restricted in size to avoid excessive token length. + Utilizing the {n,m} regex operator is probably not the right way + to do this. + 1024 is chosen arbitrarily, increasing to e.g. 4096 enhances scanner + performance, trading off against huge table sizes. + This whole strategy needs to be revisited with support of flex experts. + */ char *cstring; fix_yylloc_nl(yylloc, yytext, yyget_extra(yyscanner)); align_string(yytext); - cstring = calloc(strlen(yytext) + 1, sizeof(char)); + cstring = (char *)malloc(strlen(yytext) + 1); strcpy(cstring, yytext); yylval->cvalue = cstring; - return(HEXA_NUM); + return(HEXA_NUM_FRAGMENT); } /* end of hexadecimal value */ @@ -170,12 +186,26 @@ COMMENT (!.*)|("//".*)[^\n] } /* end of comment */ -[()] { - /* left or right parenthes */ +"(" { fix_yylloc(yylloc, yytext); return(yytext[0]); } /* end of left or right parenthesis */ +"(" { + /* scanning until a left parenthesis under this start condition implicitly + skips whitespace that would have been attributed to HEXA_NUM_FRAGMENT + otherwise */ + fix_yylloc(yylloc, yytext); + /* now hand over to HEXA_NUM_FRAGMENT */ + BEGIN(expect_hexa_num); + return(yytext[0]); +} /* end of left or right parenthesis */ +")" { + fix_yylloc(yylloc, yytext); + BEGIN(INITIAL); + return(yytext[0]); +} /* end of left or right parenthesis */ + \n { /* token is a new line character */ @@ -190,7 +220,7 @@ COMMENT (!.*)|("//".*)[^\n] ; { /* token is end of statement character */ - /* release expect_vector */ + /* release start condition */ BEGIN(INITIAL); fix_yylloc(yylloc, yytext);