/* Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for YARA */ %{ /* Disable warnings for unused functions in this file. As we redefine YY_FATAL_ERROR macro to use our own function yara_yyfatal, the yy_fatal_error function generated by Flex is not actually used, causing a compiler warning. Flex doesn't offer any options to remove the yy_fatal_error function. When they include something like %option noyy_fatal_error as they do with noyywrap then we can remove this pragma. */ #ifdef __GNUC__ #pragma GCC diagnostic ignored "-Wunused-function" #endif #include #include #include #include #include #include #if defined(_WIN32) || defined(__CYGWIN__) #include #else #include #include #endif #if defined(_WIN32) #define strtoll _strtoi64 #endif #include #include #include #include #include #include #include #include "grammar.h" #define error(error_code) \ { \ compiler->last_error = error_code; \ yyerror(yyscanner, compiler, NULL); \ yyterminate(); \ } #define syntax_error(error_msg) \ { \ yr_compiler_set_error_extra_info(compiler, error_msg); \ error(ERROR_SYNTAX_ERROR); \ } #define lex_check_space_ok(data, current_size, max_length) \ if (strlen(data) + current_size >= max_length - 1) \ { \ yyerror(yyscanner, compiler, "out of space in lex_buf"); \ yyterminate(); \ } #define yytext_to_buffer \ { \ char *yptr = yytext; \ lex_check_space_ok(yptr, yyextra->lex_buf_len, YR_LEX_BUF_SIZE); \ while(*yptr) \ { \ *yyextra->lex_buf_ptr++ = *yptr++; \ yyextra->lex_buf_len++; \ } \ } #define alloc_sized_string(str, str_len) \ SIZED_STRING* str = (SIZED_STRING*) yr_malloc( \ str_len + sizeof(SIZED_STRING)); \ if (str == NULL) \ { \ yyerror(yyscanner, compiler, "not enough memory"); \ yyterminate(); \ } \ else \ { \ str->length = (uint32_t) (str_len); \ str->flags = 0; \ } \ #ifdef _WIN32 #define snprintf _snprintf #endif static bool is_absolute_path( char* path) { if (path == NULL) return false; #if defined(_WIN32) || defined(__CYGWIN__) return strlen(path) > 2 && path[1] == ':' && (path[2] == '/' || path[2] == '\\'); #else return strlen(path) > 0 && path[0] == '/'; #endif } %} %option reentrant bison-bridge %option noyywrap %option nounistd %option noinput %option nounput %option never-interactive %option yylineno %option prefix="yara_yy" %option outfile="lex.yy.c" %option verbose %option warn %x str %x regexp %x include %x comment digit [0-9] letter [a-zA-Z] hexdigit [a-fA-F0-9] octdigit [0-7] %% ".." { return _DOT_DOT_; } "<" { return _LT_; } ">" { return _GT_; } "<=" { return _LE_; } ">=" { return _GE_; } "==" { return _EQ_; } "!=" { return _NEQ_; } "<<" { return _SHIFT_LEFT_; } ">>" { return _SHIFT_RIGHT_; } "private" { return _PRIVATE_; } "global" { return _GLOBAL_; } "rule" { return _RULE_; } "meta" { return _META_; } "strings" { return _STRINGS_; } "ascii" { return _ASCII_; } "wide" { return _WIDE_; } "xor" { return _XOR_; } "base64" { return _BASE64_; } "base64wide" { return _BASE64_WIDE_; } "fullword" { return _FULLWORD_; } "nocase" { return _NOCASE_; } "condition" { return _CONDITION_; } "true" { return _TRUE_; } "false" { return _FALSE_; } "not" { return _NOT_; } "and" { return _AND_; } "or" { return _OR_; } "at" { return _AT_; } "in" { return _IN_; } "of" { return _OF_; } "them" { return _THEM_; } "for" { return _FOR_; } "all" { return _ALL_; } "any" { return _ANY_; } "none" { return _NONE_; } "entrypoint" { return _ENTRYPOINT_; } "filesize" { return _FILESIZE_; } "matches" { return _MATCHES_; } "contains" { return _CONTAINS_; } "startswith" { return _STARTSWITH_; } "endswith" { return _ENDSWITH_; } "icontains" { return _ICONTAINS_; } "istartswith" { return _ISTARTSWITH_; } "iendswith" { return _IENDSWITH_; } "iequals" { return _IEQUALS_; } "import" { return _IMPORT_; } "defined" { return _DEFINED_; } "/*" { BEGIN(comment); } "*/" { BEGIN(INITIAL); } (.|\n) { /* skip comments */ } "//"[^\n]* { /* skip single-line comments */ } include[ \t]+\" { yyextra->lex_buf_ptr = yyextra->lex_buf; yyextra->lex_buf_len = 0; BEGIN(include); } [^\"]+ { yytext_to_buffer; } \" { if (compiler->include_callback != NULL) { #ifdef _MSC_VER char* b = NULL; #endif char* s = NULL; char* f; char buffer[1024]; const char* included_rules; char* current_file_name; char* include_path; *yyextra->lex_buf_ptr = '\0'; // null-terminate included file path current_file_name = yr_compiler_get_current_file_name(compiler); if (current_file_name == NULL || compiler->include_callback != _yr_compiler_default_include_callback || is_absolute_path(yyextra->lex_buf)) { include_path = yyextra->lex_buf; } else { strlcpy(buffer, current_file_name, sizeof(buffer)); s = strrchr(buffer, '/'); #ifdef _MSC_VER b = strrchr(buffer, '\\'); // in Windows both path delimiters are accepted #endif #ifdef _MSC_VER if (s != NULL || b != NULL) #else if (s != NULL) #endif { #ifdef _MSC_VER f = (b > s) ? (b + 1) : (s + 1); #else f = s + 1; #endif strlcpy(f, yyextra->lex_buf, sizeof(buffer) - (f - buffer)); include_path = buffer; } else { include_path = yyextra->lex_buf; } } YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( compiler->arena, YR_NAMESPACES_TABLE, compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); included_rules = compiler->include_callback( include_path, current_file_name, ns->name, compiler->incl_clbk_user_data); if (included_rules != NULL) { int error_code = _yr_compiler_push_file_name(compiler, include_path); if (error_code != ERROR_SUCCESS) { if (error_code == ERROR_INCLUDES_CIRCULAR_REFERENCE) { yyerror(yyscanner, compiler, "includes circular reference"); } else if (error_code == ERROR_INCLUDE_DEPTH_EXCEEDED) { yyerror(yyscanner, compiler, "includes depth exceeded"); } if (compiler->include_free != NULL) { compiler->include_free(included_rules, compiler->incl_clbk_user_data); } yyterminate(); } // Workaround for flex issue: https://github.com/westes/flex/issues/58 yypush_buffer_state(YY_CURRENT_BUFFER, yyscanner); yy_scan_string(included_rules, yyscanner); yyset_lineno(1, yyscanner); if (compiler->include_free != NULL) { compiler->include_free(included_rules, compiler->incl_clbk_user_data); } } else { char* err_msg_fmt; char err_msg[512]; if (compiler->include_callback == _yr_compiler_default_include_callback) { err_msg_fmt = "can't open include file: %s"; } else { err_msg_fmt = "callback failed to provide include resource: %s"; } snprintf( err_msg, sizeof(err_msg), err_msg_fmt, yyextra->lex_buf); yyerror(yyscanner, compiler, err_msg); } } else // not allowing includes { yyerror(yyscanner, compiler, "includes are disabled"); } BEGIN(INITIAL); } <> { yypop_buffer_state(yyscanner); if (!YY_CURRENT_BUFFER) yyterminate(); return _END_OF_INCLUDED_FILE_; } $({letter}|{digit}|_)*"*" { yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) error(ERROR_INSUFFICIENT_MEMORY); return _STRING_IDENTIFIER_WITH_WILDCARD_; } $({letter}|{digit}|_)* { yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) error(ERROR_INSUFFICIENT_MEMORY); return _STRING_IDENTIFIER_; } #({letter}|{digit}|_)* { yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) { error(ERROR_INSUFFICIENT_MEMORY); } else { yylval->c_string[0] = '$'; /* replace # by $*/ } return _STRING_COUNT_; } @({letter}|{digit}|_)* { yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) { error(ERROR_INSUFFICIENT_MEMORY); } else { yylval->c_string[0] = '$'; /* replace @ by $*/ } return _STRING_OFFSET_; } !({letter}|{digit}|_)* { yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) { error(ERROR_INSUFFICIENT_MEMORY); } else { yylval->c_string[0] = '$'; /* replace ! by $*/ } return _STRING_LENGTH_; } u?int(8|16|32)(be)? { char* text = yytext; if (*text == 'u') { yylval->integer = 3; text++; } else { yylval->integer = 0; } if (strstr(text, "int8") == text) { yylval->integer += 0; text += 4; } else if (strstr(text, "int16") == text) { yylval->integer += 1; text += 5; } else if (strstr(text, "int32") == text) { yylval->integer += 2; text += 5; } if (strcmp(text, "be") == 0) { yylval->integer += 6; } return _INTEGER_FUNCTION_; } ({letter}|_)({letter}|{digit}|_)* { if (strlen(yytext) > 128) syntax_error("identifier too long"); yylval->c_string = yr_strdup(yytext); if (yylval->c_string == NULL) error(ERROR_INSUFFICIENT_MEMORY); return _IDENTIFIER_; } {digit}+(MB|KB){0,1} { char *endptr; errno = 0; yylval->integer = strtoll(yytext, &endptr, 10); if (yylval->integer == LLONG_MAX && errno == ERANGE) { yr_compiler_set_error_extra_info(compiler, yytext); error(ERROR_INTEGER_OVERFLOW); } else if (strstr(yytext, "KB") != NULL) { if (yylval->integer > LLONG_MAX / 1024) { yr_compiler_set_error_extra_info(compiler, yytext); error(ERROR_INTEGER_OVERFLOW); } else { yylval->integer *= 1024; } } else if (strstr(yytext, "MB") != NULL) { if (yylval->integer > LLONG_MAX / 1048576) { yr_compiler_set_error_extra_info(compiler, yytext); error(ERROR_INTEGER_OVERFLOW); } else { yylval->integer *= 1048576; } } return _NUMBER_; } {digit}+"."{digit}+ { yylval->double_ = atof(yytext); return _DOUBLE_; } 0x{hexdigit}+ { char *endptr; errno = 0; yylval->integer = strtoll(yytext, &endptr, 16); if (yylval->integer == LLONG_MAX && errno == ERANGE) { yr_compiler_set_error_extra_info(compiler, yytext); error(ERROR_INTEGER_OVERFLOW); } return _NUMBER_; } 0o{octdigit}+ { char *endptr; errno = 0; yylval->integer = strtoll(yytext + 2, &endptr, 8); if (yylval->integer == LLONG_MAX && errno == ERANGE) { yr_compiler_set_error_extra_info(compiler, yytext); error(ERROR_INTEGER_OVERFLOW); } return _NUMBER_; } \" { /* saw closing quote - all done */ alloc_sized_string(s, yyextra->lex_buf_len); *yyextra->lex_buf_ptr = '\0'; memcpy(s->c_string, yyextra->lex_buf, yyextra->lex_buf_len + 1); yylval->sized_string = s; BEGIN(INITIAL); return _TEXT_STRING_; } \\t { lex_check_space_ok("\t", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\t'; yyextra->lex_buf_len++; } \\r { lex_check_space_ok("\r", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\r'; yyextra->lex_buf_len++; } \\n { lex_check_space_ok("\n", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\n'; yyextra->lex_buf_len++; } \\\" { lex_check_space_ok("\"", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\"'; yyextra->lex_buf_len++; } \\\\ { lex_check_space_ok("\\", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\\'; yyextra->lex_buf_len++; } \\x{hexdigit}{2} { int result; sscanf(yytext + 2, "%x", &result); lex_check_space_ok("X", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = result; yyextra->lex_buf_len++; } [^\\\n\"]+ { yytext_to_buffer; } \n { syntax_error("unterminated string"); } \\(.|\n) { syntax_error("illegal escape sequence"); } \/i?s? { if (yyextra->lex_buf_len > 0) { alloc_sized_string(s, yyextra->lex_buf_len); if (yytext[1] == 'i') s->flags |= SIZED_STRING_FLAGS_NO_CASE; if (yytext[1] == 's' || yytext[2] == 's') s->flags |= SIZED_STRING_FLAGS_DOT_ALL; *yyextra->lex_buf_ptr = '\0'; strlcpy(s->c_string, yyextra->lex_buf, s->length + 1); yylval->sized_string = s; } else { syntax_error("empty regular expression"); } BEGIN(INITIAL); return _REGEXP_; } \\\/ { lex_check_space_ok("/", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '/'; yyextra->lex_buf_len++ ; } \\. { lex_check_space_ok("\\.", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); if (yytext[1] == 0) syntax_error("malformed regular expression"); *yyextra->lex_buf_ptr++ = yytext[0]; *yyextra->lex_buf_ptr++ = yytext[1]; yyextra->lex_buf_len += 2; } [^/\n\\]+ { yytext_to_buffer; } \n { syntax_error("unterminated regular expression"); } \" { yylval->sized_string = NULL; yyextra->lex_buf_ptr = yyextra->lex_buf; yyextra->lex_buf_len = 0; BEGIN(str); } "/" { yylval->sized_string = NULL; yyextra->lex_buf_ptr = yyextra->lex_buf; yyextra->lex_buf_len = 0; BEGIN(regexp); } \{(({hexdigit}|[ \-|\~\?\[\]\(\)\n\r\t]|\/\*(\/|\**[^*/])*\*+\/)+|\/\/.*\n)+\} { // Match hex-digits with whitespace or comments. The latter are stripped // out by hex_lexer.l // TODO(vmalvarez): Integrate the hex string lexer and parser into this one, // by having a single lexer/parser instead of two different ones we can avoid // complex regular expressions like the one above, which is actually trying to // do some parsing in the lexer. alloc_sized_string(s, strlen(yytext)); strlcpy(s->c_string, yytext, s->length + 1); yylval->sized_string = s; return _HEX_STRING_; } [ \t\r\n] /* skip whitespace */ . { if (yytext[0] >= 32 && yytext[0] < 127) { return yytext[0]; } else { syntax_error("non-ascii character"); } } %% void yywarning( yyscan_t yyscanner, const char *message_fmt, ...) { YR_COMPILER* compiler = yyget_extra(yyscanner); char* file_name; char message[512]; va_list message_args; if (compiler->callback == NULL) return; va_start(message_args, message_fmt); if (compiler->file_name_stack_ptr > 0) file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; else file_name = NULL; vsnprintf(message, sizeof(message), message_fmt, message_args); YR_RULE* current_rule = NULL; if (compiler->current_rule_idx != UINT32_MAX) current_rule = yr_arena_get_ptr( compiler->arena, YR_RULES_TABLE, compiler->current_rule_idx * sizeof(YR_RULE)); compiler->callback( YARA_ERROR_LEVEL_WARNING, file_name, compiler->current_line ? compiler->current_line : yyget_lineno(yyscanner), current_rule, message, compiler->user_data); va_end(message_args); } void yyfatal( yyscan_t yyscanner, const char *error_message) { YR_COMPILER* compiler = yyget_extra(yyscanner); yyerror(yyscanner, compiler, error_message); longjmp(compiler->error_recovery, 1); } void yyerror( yyscan_t yyscanner, YR_COMPILER* compiler, const char *error_message) { char message[512] = {'\0'}; char* file_name = NULL; compiler->errors++; if (compiler->current_line != 0) compiler->last_error_line = compiler->current_line; else compiler->last_error_line = yyget_lineno(yyscanner); compiler->current_line = 0; if (compiler->file_name_stack_ptr > 0) { file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1]; } else { file_name = NULL; } YR_RULE* current_rule = NULL; if (compiler->current_rule_idx != UINT32_MAX) current_rule = yr_arena_get_ptr( compiler->arena, YR_RULES_TABLE, compiler->current_rule_idx * sizeof(YR_RULE)); // if error_message != NULL the error comes from yyparse internal code // else the error comes from my code and the error code is set in // compiler->last_error if (error_message != NULL) { yr_compiler_set_error_extra_info(compiler, error_message); compiler->last_error = ERROR_SYNTAX_ERROR; if (compiler->callback != NULL) { compiler->callback( YARA_ERROR_LEVEL_ERROR, file_name, compiler->last_error_line, current_rule, error_message, compiler->user_data); } } else if (compiler->callback != NULL) { yr_compiler_get_error_message(compiler, message, sizeof(message)); compiler->callback( YARA_ERROR_LEVEL_ERROR, file_name, compiler->last_error_line, current_rule, message, compiler->user_data); } } int yr_lex_parse_rules_bytes( const void* rules_data, size_t rules_size, YR_COMPILER* compiler) { yyscan_t yyscanner; compiler->errors = 0; if (yylex_init(&yyscanner) != 0) { compiler->errors = 1; compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; } if (setjmp(compiler->error_recovery) != 0) return compiler->errors; #if YYDEBUG yydebug = 1; #endif yyset_extra(compiler, yyscanner); yy_scan_bytes(rules_data, rules_size, yyscanner); yyset_lineno(1, yyscanner); yyparse(yyscanner, compiler); yylex_destroy(yyscanner); return compiler->errors; } int yr_lex_parse_rules_string( const char* rules_string, YR_COMPILER* compiler) { yyscan_t yyscanner; compiler->errors = 0; if (yylex_init(&yyscanner) != 0) { compiler->errors = 1; compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; } if (setjmp(compiler->error_recovery) != 0) return compiler->errors; #if YYDEBUG yydebug = 1; #endif yyset_extra(compiler, yyscanner); yy_scan_string(rules_string, yyscanner); yyset_lineno(1, yyscanner); yyparse(yyscanner, compiler); yylex_destroy(yyscanner); return compiler->errors; } int yr_lex_parse_rules_file( FILE* rules_file, YR_COMPILER* compiler) { yyscan_t yyscanner; compiler->errors = 0; if (yylex_init(&yyscanner) != 0) { compiler->errors = 1; compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; } if (setjmp(compiler->error_recovery) != 0) return compiler->errors; #if YYDEBUG yydebug = 1; #endif yyset_in(rules_file, yyscanner); yyset_extra(compiler, yyscanner); yyparse(yyscanner, compiler); yylex_destroy(yyscanner); return compiler->errors; } int yr_lex_parse_rules_fd( YR_FILE_DESCRIPTOR rules_fd, YR_COMPILER* compiler) { yyscan_t yyscanner; size_t file_size; void* buffer; #if defined(_WIN32) || defined(__CYGWIN__) DWORD bytes_read; #endif compiler->errors = 0; if (setjmp(compiler->error_recovery) != 0) return compiler->errors; #if defined(_WIN32) || defined(__CYGWIN__) file_size = (size_t) GetFileSize(rules_fd, NULL); #else struct stat fs; if (fstat(rules_fd, &fs) != 0) { compiler->errors = 1; compiler->last_error = ERROR_COULD_NOT_READ_FILE; return compiler->errors; } file_size = (size_t) fs.st_size; #endif buffer = yr_malloc(file_size); if (buffer == NULL) { compiler->errors = 1; compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; } #if defined(_WIN32) || defined(__CYGWIN__) if (!ReadFile(rules_fd, buffer, file_size, &bytes_read, NULL)) #else if (read(rules_fd, buffer, file_size) != file_size) #endif { yr_free(buffer); compiler->errors = 1; compiler->last_error = ERROR_COULD_NOT_READ_FILE; return compiler->errors; } if (yylex_init(&yyscanner) != 0) { yr_free(buffer); compiler->errors = 1; compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; } #if YYDEBUG yydebug = 1; #endif yyset_extra(compiler, yyscanner); yy_scan_bytes((const char*) buffer, (int) file_size, yyscanner); yyset_lineno(1, yyscanner); yyparse(yyscanner, compiler); yylex_destroy(yyscanner); yr_free(buffer); return compiler->errors; }