/* Copyright (c) 2013. The YARA Authors. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for hex strings */ %{ /* Disable warnings for unused functions in this file. As we redefine YY_FATAL_ERROR macro to use our own function hex_yyfatal, the yy_fatal_error function generated by Flex is not actually used, causing a compiler warning. Flex doesn't offer any options to remove the yy_fatal_error function. When they include something like %option noyy_fatal_error as they do with noyywrap then we can remove this pragma. */ #ifdef __GNUC__ #pragma GCC diagnostic ignored "-Wunused-function" #endif #include #include #include #include #include #include #include #include #include #include "hex_grammar.h" #ifdef _WIN32 #define snprintf _snprintf #endif #define ERROR_IF(x, error) \ if (x) \ { \ RE_AST* re_ast = yyget_extra(yyscanner); \ re_ast->error_code = error; \ YYABORT; \ } \ %} %option reentrant bison-bridge %option noyywrap %option nounistd %option noinput %option nounput %option never-interactive %option yylineno %option prefix="hex_yy" %option outfile="lex.yy.c" %option verbose %option warn digit [0-9] letter [a-zA-Z] hexdigit [a-fA-F0-9] %x comment %x range %% {hexdigit}{2} { yylval->integer = xtoi(yytext); return _BYTE_; } {hexdigit}\? { yytext[1] = '0'; // replace ? by 0 yylval->integer = xtoi(yytext) | 0xF000 ; return _MASKED_BYTE_; } \?{hexdigit} { yytext[0] = '0'; // replace ? by 0 yylval->integer = xtoi(yytext) | 0x0F00 ; return _MASKED_BYTE_; } \?\? { yylval->integer = 0x0000; return _MASKED_BYTE_; } \[ { BEGIN(range); return yytext[0]; } "/*" { BEGIN(comment); } "*/" { BEGIN(INITIAL); } .|\n // skip comments "//".* // skip single-line comments \- { return yytext[0]; } {digit}+ { yylval->integer = atoi(yytext); return _NUMBER_; } \] { BEGIN(INITIAL); return yytext[0]; } [ \t\r\n] // skip whitespaces . { yyerror(yyscanner, lex_env, "invalid character in hex string jump"); yyterminate(); } [ \t\r\n] // skip whitespaces [{}()|] { // pass valid characters to the parser return yytext[0]; } . { // reject all other characters yyerror(yyscanner, lex_env, "invalid character in hex string"); yyterminate(); } %% void yyfatal( yyscan_t yyscanner, const char *error_message) { jmp_buf* recovery_state = (jmp_buf*) yr_thread_storage_get_value( &yr_recovery_state_key); longjmp(*recovery_state, 1); } void yyerror( yyscan_t yyscanner, HEX_LEX_ENVIRONMENT* lex_env, const char *error_message) { // if lex_env->last_error was set to some error code before // don't overwrite it, we are interested in the first error, not in // subsequent errors like "syntax error, unexpected $end" caused by // early parser termination. if (lex_env->last_error == ERROR_SUCCESS) { lex_env->last_error = ERROR_INVALID_HEX_STRING; strlcpy( lex_env->last_error_message, error_message, sizeof(lex_env->last_error_message)); } } int yr_parse_hex_string( const char* hex_string, RE_AST** re_ast, RE_ERROR* error) { yyscan_t yyscanner; jmp_buf recovery_state; HEX_LEX_ENVIRONMENT lex_env; lex_env.last_error = ERROR_SUCCESS; lex_env.inside_or = 0; yr_thread_storage_set_value(&yr_recovery_state_key, &recovery_state); if (setjmp(recovery_state) != 0) return ERROR_INTERNAL_FATAL_ERROR; FAIL_ON_ERROR(yr_re_ast_create(re_ast)); // The RE_FLAGS_FAST_REGEXP flag indicates a regular expression can be // matched by faster algorithm. These regular expressions come from hex // strings that do not contain alternatives, like in: // // { ( 01 02 | 03 04) 05 06 }. // // This flag is unset later during parsing if alternatives are used. (*re_ast)->flags |= RE_FLAGS_FAST_REGEXP; // Set RE_FLAGS_DOT_ALL because in hex strings the "dot" (?? in this case) // must match all characters including new-line. (*re_ast)->flags |= RE_FLAGS_DOT_ALL; yylex_init(&yyscanner); yyset_extra(*re_ast, yyscanner); yy_scan_string(hex_string, yyscanner); yyparse(yyscanner, &lex_env); yylex_destroy(yyscanner); if (lex_env.last_error != ERROR_SUCCESS) { strlcpy(error->message, lex_env.last_error_message, sizeof(error->message)); return lex_env.last_error; } return ERROR_SUCCESS; }