#include namespace { enum TokenType { RAW_STRING_LITERAL, }; struct Scanner { bool scan(TSLexer* lexer, const bool* valid_symbols) { // scan a raw string literal; see R source code for implementation: // https://github.com/wch/r-source/blob/52b730f217c12ba3d95dee0cd1f330d1977b5ea3/src/main/gram.y#L3102 // raw string literals can start with either 'r' or 'R' char prefix = lexer->lookahead; if (prefix != 'r' && prefix != 'R') { return false; } lexer->advance(lexer, false); // check for quote character char quote = lexer->lookahead; if (quote != '"' && quote != '\'') { return false; } lexer->advance(lexer, false); // start counting '-' characters int hyphen_count = 0; while (lexer->lookahead == '-') { lexer->advance(lexer, false); hyphen_count += 1; } // check for an opening bracket, and figure out // the corresponding closing bracket char opening_bracket = lexer->lookahead; char closing_bracket = 0; if (opening_bracket == '(') { closing_bracket = ')'; lexer->advance(lexer, false); } else if (opening_bracket == '[') { closing_bracket = ']'; lexer->advance(lexer, false); } else if (opening_bracket == '{') { closing_bracket = '}'; lexer->advance(lexer, false); } else { return false; } // we're in the body of the raw string; start looping until // we find the matching closing bracket for (; lexer->lookahead != 0; lexer->advance(lexer, false)) { // consume a closing bracket if (lexer->lookahead != closing_bracket) { continue; } lexer->advance(lexer, false); // consume hyphens bool hyphens_ok = true; for (int i = 0; i < hyphen_count; i++) { if (lexer->lookahead != '-') { hyphens_ok = false; break; } lexer->advance(lexer, false); } if (!hyphens_ok) { continue; } // consume a closing quote character if (lexer->lookahead != quote) { continue; } lexer->advance(lexer, false); // success! lexer->result_symbol = RAW_STRING_LITERAL; return true; } // if we get here, this implies we hit eof (and so we have // an unclosed raw string) return false; } }; } // end anonymous namespace extern "C" { void *tree_sitter_r_external_scanner_create() { return new Scanner(); } bool tree_sitter_r_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { Scanner* scanner = static_cast(payload); return scanner->scan(lexer, valid_symbols); } unsigned tree_sitter_r_external_scanner_serialize(void *payload, char *buffer) { return 0; } void tree_sitter_r_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { } void tree_sitter_r_external_scanner_destroy(void *payload) { Scanner* scanner = static_cast(payload); delete scanner; } } // extern "C"