/* Copyright (c) 2022, 2024, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is designed to work with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have either included with the program or referenced in the documentation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** @file log_source_backtrace.cc This file contains contains code to search for a recent stackdump in a file formatted in the "traditional error log" format. If one is found, it is prepended to the buffered log-events. */ #include // MYSQL_TIME_STATUS #include // MYSQL_TIME #include // std::cerr #include "log_sink_buffer.h" // struct log_line_buffer #include "log_sink_trad.h" // parse_trad_field() #include "my_dir.h" // MY_STAT #include "mysql/components/services/log_builtins.h" // LogEvent() #include "mysql/components/services/log_service.h" #include "mysql/my_loglevel.h" #include "mysqld_error.h" // ER_STACKTRACE #include "sql/log.h" // iso8601_timestamp_to_microseconds() #include "sql/sql_list.h" // List /// How far back in the trad error-log should we start looking? static const size_t max_backtrace = 32 * 1024; // be generous, it's temporary /// Latest ISO8601 timestamp found outside a stackdump. static ulonglong iso8601_outside_stack = 0; /// ISO8601 timestamp in current stackdump. static ulonglong iso8601_during_stack = 0; /// Pointer to first character of a textual stacktrace, or nullptr. static char *backtrace_beg = nullptr; /// Pointer to last character of a textual stacktrace, or nullptr. static char *backtrace_end = nullptr; /** If a stack backtrace was found in the traditional error log, prepend its lines to buffered logging (so the backtrace may be flushed to performance_schema.error_log and all configured log-sinks later). `backtrace_beg` must be set to the beginning of a char-buffer containing a textual stacktrace. `backtrace_end` must point to the last valid character in the textual stacktrace. The "textual representation" must consist of one or several lines terminated by '\n'. These '\n' will be replaced with '\0'. Each line will become one log-event. The list of log-events, if any, will be prepended to the list of log-events buffered during start-up. After this function returns, the caller is free to free() the buffer containing the log file fragment. */ static log_service_error log_source_backtrace_add_events() { // Head of the list of buffered log messages we're creating log_line_buffer *log_line_backtrace_head = nullptr; // Where to write the pointer to the newly-created tail-element of the list log_line_buffer **log_line_backtrace_tail = &log_line_backtrace_head; // List element we creating log_line_buffer *lle; char *my_line = backtrace_beg; log_service_error ret = LOG_SERVICE_SUCCESS; // return value assert((backtrace_beg != nullptr) && (backtrace_end != nullptr)); // Add backtrace to buffered log, each line becoming one log event. while (my_line < backtrace_end) { char *my_eol = (char *)memchr(my_line, '\n', (size_t)(backtrace_end - my_line + 1)); // Fail-safe. Failing to parse is bad, but breaking start-up is worse. if (my_eol == nullptr) break; // Only add lines that aren't empty. if (my_eol > my_line) { *my_eol = '\0'; // Allocate an element for our singly-linked list. if ((lle = (log_line_buffer *)my_malloc( key_memory_log_error_stack, sizeof(log_line_buffer), MYF(0))) == nullptr) { ret = LOG_SERVICE_OUT_OF_MEMORY; /* purecov: inspected */ break; /* purecov: inspected */ } /* Create a log-event from the backtrace line. Note the use of message() instead of verbatim() in order to copy the message into the LogEvent's allocation. To use verbatim(), we'd have to manage the message's life-cycle ourselves instead. */ log_line *ll; LogEvent() .type(LOG_TYPE_ERROR) .errcode(ER_STACK_BACKTRACE) .subsys(LOG_SUBSYSTEM_TAG) .prio(SYSTEM_LEVEL) // make it unfilterable .message("%s", my_line) // line from the backtrace .steal(&ll); // obtain pointer to the event-data // Add a timestamp to the event. // The first event gets the timestamp we parsed from the textual // stacktrace (contained in `iso8601_during_stack`); subsequent // lines strictly increase this timestamp to guarantee row order. log_line_item_set(ll, LOG_ITEM_LOG_BUFFERED)->data_integer = iso8601_during_stack++; // Put event in our single-linked list list of backtrace-events. lle->next = nullptr; lle->ll = *ll; // Shallow-copy the log-event we created above. // Release the event we created, but keep its allocations // (as they're still used by the copy). log_line_exit(ll); // Append event to list of backtrace log-events. *log_line_backtrace_tail = lle; // current_tail->next = new_element log_line_backtrace_tail = &(lle->next); } my_line = ++my_eol; // Next line in buffer starts after '\n'. } // Prepend our singly-linked list of stacktrace events // to the singly-linked list of start-up events. // Flushing this later will free it. log_sink_buffer_prepend_list(log_line_backtrace_head, log_line_backtrace_tail); return ret; } /** Parse an ISO-8601 compliant timestamp. @param line_start Pointer to the first character of the line. @param line_end Pointer to the '\n' ending the line. @retval Number of micro-seconds since the epoch represented by the timestamp */ ulonglong log_iso8601_from_line(const char *line_start, const char *line_end) { ulonglong iso8601_time = 0; const char *end; char timestamp[iso8601_size]; // space for timestamp + '\0' ssize_t len; if ((len = parse_trad_field(line_start, &end, line_end)) > 0) { if (len < iso8601_size) { memcpy(timestamp, line_start, len); timestamp[len] = '\0'; // terminate target buffer // The parse-function corrects for timezone. iso8601_time = iso8601_timestamp_to_microseconds(timestamp, len); } } return iso8601_time; } /** Parse a single line in the traditional error-log. This function may be called to examine a buffer containing part of an error log line by line. If the header of a stack backtrace is recognized, certain variables are set up; if the end of a backtrace is recognized, those variables are cleared again. In other words, if the all the loaded log-lines have been examined and the variables are non-zero, the last chunk in the error log was a backtrace, and we have saved - `iso8601_during_stack` is non-zero and contains the time/date (micro-seconds since the epoch, UTC) of the backtrace. - `backtrace_beg` is non-null and points to the first character of the backtrace. - `backtrace_end` is non-null and points to the last character in the backtrace. `backtrace_beg` and `backtrace_end` if set point to addresses in the temporary buffer that contains the error log fragment. This function will attempt to determine whether the last item in the traditional error log is a backtrace and if so, identify the beginning and end of that backtrace in the buffer. It does _not_ copy the information, add it to performance_schema.error_log, and so forth; this is something the caller can see to if desired. @param line_start Pointer to the first character of the line. @param line_length Length of the line in bytes. @retval LOG_SERVICE_MISC_ERROR log was written to after server start */ static log_service_error log_source_backtrace_parse_line(char *line_start, size_t line_length) { const char *line_end = line_start + line_length; bool is_iso8601 = false; if (*line_end != '\n') return LOG_SERVICE_PARSE_ERROR; // lines are '\n' separated /* ISO8601 sanity check ("2022-04-26T23:45:06[Z.]") If it's a stacktrace, there will be no micro-seconds ('Z'); if it's a normal trad-log line, micro-seconds will follow ('.'). Note that before WL#14955, stacktrace timestamps had no date part. These old stacktraces will be disregarded here. This is intentional. */ if ((line_length >= 20) && // must start with timestamp (line_start[0] == '2') && (line_start[4] == '-') && (line_start[7] == '-') && (line_start[10] == 'T') && (line_start[13] == ':') && (line_start[16] == ':') && ((line_start[19] == '.') || (line_start[19] == 'Z'))) is_iso8601 = true; // First, let's see whether it's the beginning of a stacktrace. // e.g. "08:00:18 UTC - mysqld got signal 7 ;" // Stacktraces are always in UTC (i.e. 'Z': no timezone offset). if (is_iso8601 && (line_length >= 27) && (0 == strncmp(line_start + 19, "Z UTC - ", 8))) { // It's always UTC. // Get timestamp. There will be no micro-second part. iso8601_during_stack = log_iso8601_from_line(line_start, line_end); // If it's before the last full timestamp we've seen, adjust micro-seconds. if (iso8601_during_stack <= iso8601_outside_stack) { /* This can happen if both timestamps are in the same second since the trad-log timestamp has a micro-second part, and the backtrace-timestamp doesn't. In this case, we just advance the backtrace timestamp to the next "free" microsecond, i.e. the micro-second after the last full timestamp we've seen. */ iso8601_during_stack = iso8601_outside_stack + 1; } // We save the beginning of the line as the beginning of the stacktrace. backtrace_beg = line_start; backtrace_end = line_start + line_length; } // See whether it's a trad log line. (If so, remember the timestamp.) // The trad log's timestamp will have microseconds ('.'). // Its timezone could be anything. else if (is_iso8601 && (line_start[19] == '.')) { // ISO8601: 2022-02-21T03:30:34.561771 (timezone could be anything) /* If we find a trad-line after a backtrace, throw the backtrace away. The assumption is that either a) the server crashed, wrote the backtrace, and ended -- in which case, the backtrace is the very last thing in the trad log (and we'll process it): ... 2022-03-16T16:17:28.299339Z 0 [System] [MY-010116] [Server] trad stuff 12:34:56 UTC - mysqld got signal 1 ; // backtrace header // log ends or b) there is a backtrace, but it's not the last section in the trad log -- in which case, the server has run between the failure we found and the current start-up. In this case, we assume that the stackdump has already been processed in that previous run. In this case, there should be a processed (correctly formatted) copy of the stackdump immediately following the raw version. Thus, we'll skip the raw version, since we'll be reading the processed one later, anyway. ... 2022-03-16T06:17:28.299339Z 0 [System] [MY-010116] [Server] trad stuff 12:34:56 UTC - mysqld got signal 1 ; // backtrace header // log ends // server restarted, processed above backtrace, and logged it: 2022-03-16T12:34:56.000001Z 0 [System] [MY-010116] [trace] 12:34:56 UTC 2022-03-16T12:35:00.299339Z 0 [System] [MY-010116] [Server] trad stuff // Later, we shutdown cleanly, and the log ends, now containing both // the raw stackdump followed by the properly formatted one. // Server is restarted a second time. The raw backtrace may be found, // but is ignored as it is not the last section in the log. The // formatted version of the stackdump behind it however is parsed // and read into performance_schema.error_log -- if the traditional // log is configured to be the main error log. If a different log // (e.g. JSON) is configured to be the main error log, the formatted // stackdump will be read thence instead (i.e. different log, same // principle). */ // Get timestamp from line. iso8601_outside_stack = log_iso8601_from_line(line_start, line_end); if (iso8601_outside_stack > 0) { // If we got a timestamp from the line ... // If read timestamp is later than server_start, stop reading. // This should not happen, but we need a failsafe. if (iso8601_outside_stack > log_builtins_started()) { return LOG_SERVICE_MISC_ERROR; } // If we already processed a backtrace, ignore it, it's not current. backtrace_beg = backtrace_end = nullptr; iso8601_during_stack = 0; return LOG_SERVICE_NOTHING_DONE; // success, of a sort } } // if trad-line ... // It's not a trad-line, and we've already detected a backtrace header. // Expand the backtrace-buffer to include this line. if (iso8601_during_stack > 0) { backtrace_end = line_start + line_length; return LOG_SERVICE_SUCCESS; } /* If we get here, we haven't seen a backtrace-header yet (after which we relax the rules and accept all kinds of lines), but we didn't get a correct ISO8601 timestamp, either. This is slightly odd, but can happen in situations such as: a) A 3rd party library writes debug info to stdout/stderr. b) The last item in the log was a stacktrace written by an older, pre-WL#14955 server that does not include the date in stacktrace-timestamps. We intentionally disregard such stacktraces. c) The server stopped as the result of a failed assert(), the output of which we just encountered. d) mysql-test-run.pl includes "\nCURRENT_TEST: " line. */ // We ignore empty lines (for mysql-test-run.pl etc.). if ((line_end - line_start) == 0) return LOG_SERVICE_NOTHING_DONE; // Unrecognized line in traditional error log file. return LOG_SERVICE_PARSE_ERROR; } /** Read tail end of the traditional error-log as a single chunk. Look for a recent stacktrace in that data. @param log_file The file's name @param size length of the input file (in bytes) @retval LOG_SERVICE_SUCCESS success @retval LOG_SERVICE_OPEN_FAILED failed to open file @retval LOG_SERVICE_SEEK_FAILED seek failed @retval LOG_SERVICE_UNABLE_TO_READ read failed or file empty @retval LOG_SERVICE_OUT_OF_MEMORY out of memory @retval LOG_SERVICE_PARSE_ERROR could not find delimiter ('\n') @retval LOG_SERVICE_MISC_ERROR parsing: sanity test failed */ static log_service_error log_error_read_backtrace_loop(const char *log_file, size_t size) { FILE *fh; // file-handle off_t pos; // where in the file to start reading char *chunk; // beginning of allocation char *line_start, *line_end; // beginning/end of line log_service_error ret = LOG_SERVICE_SUCCESS; // return value // Reset backtrace window. backtrace_beg = backtrace_end = nullptr; // Is there any data to read? if (size <= 0) return LOG_SERVICE_UNABLE_TO_READ; if ((fh = my_fopen(log_file, O_RDONLY, MYF(0))) == nullptr) return LOG_SERVICE_OPEN_FAILED; // Allocate memory to read the tail of the log into. if ((chunk = (char *)my_malloc(key_memory_log_sink_pfs, max_backtrace, MYF(0))) == nullptr) { ret = LOG_SERVICE_OUT_OF_MEMORY; /* purecov: inspected */ goto fail_with_close; } /* If the file would fit into the buffer entirely, we'll read it from the beginning. Otherwise, we start reading from a point in the file where about the size of the buffer remains as input. */ if (size <= max_backtrace) pos = 0; else { pos = size - max_backtrace; size = max_backtrace; // Correct size to the part we'll actually read. // Seek to the approximate position of the row to start reading at. if (my_fseek(fh, (long)pos, SEEK_SET)) { /* purecov: begin inspected */ ret = LOG_SERVICE_SEEK_FAILED; goto fail_with_free; /* purecov: end */ } } if (my_fread(fh, (uchar *)chunk, size, MYF(0)) < size) { ret = LOG_SERVICE_UNABLE_TO_READ; /* purecov: begin inspected */ goto fail_with_free; /* purecov: end */ } if (pos > 0) { // We're likely in the middle of a row, skip forward to the next. if ((line_start = (char *)memchr(chunk, '\n', size)) == nullptr) { ret = LOG_SERVICE_PARSE_ERROR; goto fail_with_free; } } else line_start = chunk; /* Process the data line-by-line until - we reach the end of the data - the line-delimiter '\n' is not found - the parse function suggests we stop (LOG_SERVICE_MISC_ERROR) */ do { const size_t processed = (size_t)(line_start - chunk); const size_t rest = size - processed; // Find EOL ('\n'). If last line is partial, skip it. if ((line_end = (char *)memchr(line_start, '\n', rest)) == nullptr) break; // Parse the current line. We are trying to determine whether the // last part of the error log is a backtrace. If so, we'll prepend // its lines to buffered logging further below. This data will then // be flushed to all configured log-sinks in their respective formats // (e.g. in JSON for the JSON-log) as well as to // performance_schema.error_log. ret = log_source_backtrace_parse_line(line_start, line_end - line_start); // Proceed to the next line (i.e. past '\n'). line_start = line_end + 1; } while ((ret != LOG_SERVICE_MISC_ERROR) && (line_start < (chunk + size))); // If we found a backtrace, prepend it to buffered logging. if (backtrace_beg != nullptr) ret = log_source_backtrace_add_events(); fail_with_free: backtrace_beg = backtrace_end = nullptr; // Any pointers stale at this point. my_free(chunk); fail_with_close: my_fclose(fh, MYF(0)); return ret; } /** Read stacktrace from previous failure. The signal-handler attempts to write a stacktrace to stderr. As stderr (and stdout) are redirected to the "traditional" error-log, that's where we'll have to look for stacktraces, even if we use a different log-sink otherwise (e.g. JSON, syslog, etc.). Once we have determined whether such a log exists and is readable, we call @see log_error_read_backtrace_loop() to do the actual reading and parsing. It should be noted that at the point this function is normally called, buffered error logging will not have been flushed yet. @param log_name The log file to read (log_error_dest). @retval LOG_SERVICE_SUCCESS Success (log read and parsed) @retval LOG_SERVICE_UNABLE_TO_READ Could not read/access() file @retval LOG_SERVICE_INVALID_ARGUMENT Invalid log-file name @retval LOG_SERVICE_ARGUMENT_TOO_LONG File-name too long @retval otherwise Return value from reader */ log_service_error log_error_read_backtrace(const char *log_name) { MY_STAT stat_log; log_service_error ret; if ((log_name == nullptr) || (log_name[0] == '\0')) return LOG_SERVICE_INVALID_ARGUMENT; if (strlen(log_name) >= FN_REFLEN) return LOG_SERVICE_ARGUMENT_TOO_LONG; /* purecov: inspected */ assert(0 != strcmp(log_name, "stderr")); /* Lock the error-logger while we're restoring the error-log so nobody writes to the log-file while we're reading it. That way, we won't have to deal with half-written lines or the file-size changing. */ if (my_access(log_name, R_OK) || (my_stat(log_name, &stat_log, MYF(0)) == nullptr)) ret = LOG_SERVICE_UNABLE_TO_READ; /* purecov: inspected */ else ret = log_error_read_backtrace_loop(log_name, (size_t)stat_log.st_size); return ret; }