/* Copyright (c) 2011, 2024, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is designed to work with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have either included with the program or referenced in the documentation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** @file Implementation of the Optimizer trace API (WL#5257) Helpers connecting the optimizer trace to THD or Information Schema. They are dedicated "to the server" (hence the file's name). In order to create a unit test of the optimizer trace without defining Item_field (and all its parent classes), Query_block..., these helpers are defined in opt_trace2server.cc. */ #include "my_config.h" #include #include #include "lex_string.h" #include "my_compiler.h" #include "my_dbug.h" #include "my_inttypes.h" #include "my_sqlcommand.h" #include "mysql/strings/m_ctype.h" #include "sql/auth/auth_acls.h" #include "sql/auth/auth_common.h" // check_table_access #include "sql/auth/sql_security_ctx.h" #include "sql/enum_query_type.h" #include "sql/field.h" #include "sql/opt_trace.h" #include "sql/opt_trace_context.h" #include "sql/set_var.h" #include "sql/sp_head.h" // sp_head #include "sql/sp_instr.h" // sp_printable #include "sql/sql_class.h" #include "sql/sql_lex.h" #include "sql/sql_list.h" #include "sql/sql_parse.h" // sql_command_flags #include "sql/sql_show.h" // schema_table_stored_record #include "sql/system_variables.h" #include "sql/table.h" #include "sql_string.h" class Item; namespace { const char I_S_table_name[] = "OPTIMIZER_TRACE"; /* Standalone functions */ /** Whether a list of tables contains information_schema.OPTIMIZER_TRACE. @param tbl list of tables @note this does not catch that a stored routine or view accesses the OPTIMIZER_TRACE table. So using a stored routine or view to read OPTIMIZER_TRACE will overwrite OPTIMIZER_TRACE as it runs and provide uninteresting info. */ bool list_has_optimizer_trace_table(const Table_ref *tbl) { for (; tbl; tbl = tbl->next_global) { if (tbl->schema_table && 0 == strcmp(tbl->schema_table->table_name, I_S_table_name)) return true; } return false; } /** Whether a SQL command qualifies for optimizer tracing. @param sql_command the command */ inline bool sql_command_can_be_traced(enum enum_sql_command sql_command) { /* Tracing is limited to a few SQL commands only. Reasons to not trace other commands: - it reduces the range of potential unknown bugs and misuse - they probably don't have anything interesting optimizer-related - query_block for them might be uninitialized and unprintable. - SHOW WARNINGS would create an uninteresting trace and thus overwrite the previous interesting one. About prepared statements: note that we don't turn on tracing for SQLCOM_PREPARE (respectively SQLCOM_EXECUTE), because we don't know yet what command is being prepared (resp. executed). We turn tracing on later, if the prepared (resp. executed) command is in the allowed set above, in check_prepared_statement() (resp. mysql_execute_command() called by Prepared_statement::execute()). PREPARE SELECT is worth tracing as it does permanent query transformations. Note that SQLCOM_SELECT includes EXPLAIN. */ return (sql_command_flags[sql_command] & CF_OPTIMIZER_TRACE); } /// @returns whether this command is "SET ... @@@@OPTIMIZER_TRACE=..." bool sets_var_optimizer_trace(enum enum_sql_command sql_command, List *set_vars) { if (sql_command == SQLCOM_SET_OPTION) { List_iterator_fast it(*set_vars); const set_var_base *var; while ((var = it++)) if (var->is_var_optimizer_trace()) return true; } return false; } void opt_trace_disable_if_no_tables_access(THD *thd, Table_ref *tbl); } // namespace Opt_trace_start::Opt_trace_start(THD *thd, Table_ref *tbl, enum enum_sql_command sql_command, List *set_vars, const char *query, size_t query_length, sp_printable *instr, const CHARSET_INFO *query_charset) : ctx(&thd->opt_trace) { DBUG_TRACE; /* By default, we need an optimizer trace: - if the user asked for it or - if we are using --debug (because the trace serves as a relay for it, for optimizer debug printouts). */ const ulonglong var = thd->variables.optimizer_trace; bool support_I_S = false, support_dbug_or_missing_priv = false; /* This will be triggered if --debug or --debug=d:opt_trace is used */ DBUG_EXECUTE("opt", support_dbug_or_missing_priv = true;); // First step, decide on what type of I_S support we want if (unlikely(var & Opt_trace_context::FLAG_ENABLED)) { if (sql_command_can_be_traced(sql_command) && // (1) !sets_var_optimizer_trace(sql_command, set_vars) && // (2) !list_has_optimizer_trace_table(tbl) && // (3) !thd->system_thread) // (4) { /* (1) This command is interesting Optimizer-wise. (2) This command is not "SET ... @@optimizer_trace=...". Otherwise, this simple usage: a) enable opt trace with SET b) run SELECT query of interest c) disable opt trace with SET d) read OPTIMIZER_TRACE table would not work: (c) would be traced which would delete the trace of (b). (3) If a SELECT of I_S.OPTIMIZER_TRACE were traced, it would overwrite the interesting trace of the previous statement. Note that list_has_optimizer_trace_table() is an expensive function (scanning the list of all used tables, doing checks on their names) but we call it only if @@optimizer_trace has enabled=on. (4) Usage of the trace in a system thread would be impractical. Additionally: - threads of the Events Scheduler have an unusual security context (thd->m_main_security_ctx.priv_user==NULL, see comment in Security_context::change_security_context()), so we can do no security checks on them, so cannot safely enable tracing. - statement-based replication of "INSERT INTO real_table SELECT * FROM I_S.OPTIMIZER_TRACE" is anyway impossible as @@optimizer_trace* are not replicated, and trace would be different between master and slave unless data and engines and version of the optimizer are strictly identical. - row-based replication of the INSERT SELECT above is still allowed, it does not require enabling optimizer trace on the slave. */ support_I_S = true; } else { /* - statement will not be traced in I_S, - if it uses a subquery, this subquery will not be traced, - if it uses a stored routine, this routine's substatements may be traced. */ } /* We will do security checks. This is true even in the exceptions (1)...(3) above. Otherwise, in: SET OPTIMIZER_TRACE="ENABLED=ON"; SELECT stored_func() FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; (exception 2), we would not check for privilege to do SHOW CREATE on stored_func, then we would enter a substatement, which would be traced, and would expose the function's body. So we will do security checks. So need to inform the trace system that it should be ready for a possible call to missing_privilege() later: */ support_dbug_or_missing_priv = true; } error = ctx->start(support_I_S, support_dbug_or_missing_priv, thd->variables.end_markers_in_json, (var & Opt_trace_context::FLAG_ONE_LINE), thd->variables.optimizer_trace_offset, thd->variables.optimizer_trace_limit, thd->variables.optimizer_trace_max_mem_size, thd->variables.optimizer_trace_features); if (likely(!error)) { if (unlikely(support_I_S) && ctx->is_started()) { if (instr != nullptr) { String buffer; buffer.set_charset(system_charset_info); instr->print(thd, &buffer); ctx->set_query(buffer.ptr(), buffer.length(), query_charset); } else ctx->set_query(query, query_length, query_charset); } } opt_trace_disable_if_no_tables_access(thd, tbl); } Opt_trace_start::~Opt_trace_start() { DBUG_TRACE; if (likely(!error)) ctx->end(); } void opt_trace_print_expanded_query(const THD *thd, Query_block *query_block, Opt_trace_object *trace_object) { const Opt_trace_context *const trace = &thd->opt_trace; /** It's hard to prove that Query_block::print() doesn't modify any of its Item-s in a dangerous way. Item_int::print(), for example, modifies its internal str_value. To make the danger rare, we print the expanded query as rarely as possible: only if I_S output is needed. If only --debug is on, we don't print it. See also the corresponding call to "set_items_ref_array" at end of JOIN::exec(). */ if (likely(!trace->support_I_S())) return; char buff[1024]; String str(buff, sizeof(buff), system_charset_info); str.length(0); /* If this statement is not SELECT, what is shown here can be inexact. INSERT SELECT is shown as SELECT. DELETE WHERE is shown as SELECT WHERE. This is acceptable given the audience (developers) and the goal (the inexact parts are irrelevant for the optimizer). */ query_block->print(thd, &str, enum_query_type(QT_TO_SYSTEM_CHARSET | QT_SHOW_SELECT_NUMBER | QT_NO_DEFAULT_DB)); trace_object->add_utf8("expanded_query", str.ptr(), str.length()); } void opt_trace_disable_if_no_security_context_access(THD *thd) { DBUG_TRACE; if (likely(!(thd->variables.optimizer_trace & Opt_trace_context::FLAG_ENABLED)) || // (1) thd->system_thread) // (2) { /* (1) We know that the routine's execution starts with "enabled=off". If it stays so until the routine ends, we needn't do security checks on the routine. If it does not stay so, it means the definer sets it to "on" somewhere in the routine's body. Then it is his conscious decision to generate traces, thus it is still correct to skip the security check. (2) Threads of the Events Scheduler have an unusual security context (thd->m_main_security_ctx.priv_user==NULL, see comment in Security_context::change_security_context()). */ return; } Opt_trace_context *const trace = &thd->opt_trace; if (!trace->is_started()) { /* @@optimizer_trace has "enabled=on" but trace is not started. Either Opt_trace_start ctor was not called for our statement (3), or it was called but at that time, the variable had "enabled=off" (4). There are no known cases of (3). (4) suggests that the user managed to change the variable during execution of the statement, and this statement is using view/routine (note that we have not been able to provoke this, maybe this is impossible). If it happens it is suspicious. We disable I_S output. And we cannot do otherwise: we have no place to store a possible "missing privilege" information (no Opt_trace_stmt, as is_started() is false), so cannot do security checks, so cannot safely do tracing, so have to disable I_S output. And even then, we don't know when to re-enable I_S output, as we have no place to store the information "re-enable tracing at the end of this statement", and we don't even have a notion of statement here (statements in the optimizer trace world mean an Opt_trace_stmt object, and there is none here). So we must disable for the session's life. COM_FIELD_LIST opens views, thus used to be a case of (3). To avoid disabling I_S output for the session's life when this command is issued (like in: "SET OPTIMIZER_TRACE='ENABLED=ON';USE somedb;" in the 'mysql' command-line client), we have decided to create a Opt_trace_start for this command. The command itself is not traced though (SQLCOM_SHOW_FIELDS does not have CF_OPTIMIZER_TRACE). */ assert(false); trace->disable_I_S_for_this_and_children(); return; } /* Note that thd->m_main_security_ctx.master_access is probably invariant across the life of THD: GRANT/REVOKE don't affect global privileges of an existing connection, per the manual. */ if (!(thd->m_main_security_ctx.check_access(GLOBAL_ACLS & ~GRANT_ACL)) && (0 != strcmp(thd->m_main_security_ctx.priv_user().str, thd->security_context()->priv_user().str) || 0 != my_strcasecmp(system_charset_info, thd->m_main_security_ctx.priv_host().str, thd->security_context()->priv_host().str))) trace->missing_privilege(); } void opt_trace_disable_if_no_stored_proc_func_access(THD *thd, sp_head *sp) { DBUG_TRACE; if (likely(!(thd->variables.optimizer_trace & Opt_trace_context::FLAG_ENABLED)) || thd->system_thread) return; Opt_trace_context *const trace = &thd->opt_trace; if (!trace->is_started()) { assert(false); trace->disable_I_S_for_this_and_children(); return; } bool full_access; Security_context *const backup_thd_sctx = thd->security_context(); DBUG_PRINT("opt", ("routine: '%s'", sp->m_name.str)); thd->set_security_context(&thd->m_main_security_ctx); const bool rc = sp->check_show_access(thd, &full_access) || !full_access; thd->set_security_context(backup_thd_sctx); if (rc) trace->missing_privilege(); } void opt_trace_disable_if_no_view_access(THD *thd, Table_ref *view, Table_ref *underlying_tables) { DBUG_TRACE; if (likely(!(thd->variables.optimizer_trace & Opt_trace_context::FLAG_ENABLED)) || thd->system_thread) return; Opt_trace_context *const trace = &thd->opt_trace; if (!trace->is_started()) { assert(false); trace->disable_I_S_for_this_and_children(); return; } DBUG_PRINT("opt", ("view: '%s'", view->table_name)); Security_context *const backup_table_sctx = view->security_ctx; Security_context *const backup_thd_sctx = thd->security_context(); const GRANT_INFO backup_grant_info = view->grant; view->security_ctx = nullptr; // no SUID context for view // no SUID context for THD thd->set_security_context(&thd->m_main_security_ctx); const int rc = check_table_access(thd, SHOW_VIEW_ACL, view, false, 1, true); view->security_ctx = backup_table_sctx; thd->set_security_context(backup_thd_sctx); view->grant = backup_grant_info; if (rc) { trace->missing_privilege(); return; } /* We needn't check SELECT privilege on this view. Some opt_trace_disable_if_no_tables_access() call has or will check it. Now we check underlying tables/views of our view: */ opt_trace_disable_if_no_tables_access(thd, underlying_tables); } namespace { /** If tracing is on, checks additional privileges on a list of tables/views, to make sure that the user has the right to do SHOW CREATE TABLE/VIEW and "SELECT *". For that: - this functions checks table-level SELECT - which is sufficient for SHOW CREATE TABLE and "SELECT *", if a base table - if a view, if the view has not been identified as such then opt_trace_disable_if_no_view_access() will be later called and check SHOW VIEW; other we check SHOW VIEW here; SHOW VIEW + SELECT is sufficient for SHOW CREATE VIEW. If a privilege is missing, notifies the trace system. @param thd thread context @param tbl list of tables to check */ void opt_trace_disable_if_no_tables_access(THD *thd, Table_ref *tbl) { DBUG_TRACE; if (likely(!(thd->variables.optimizer_trace & Opt_trace_context::FLAG_ENABLED)) || thd->system_thread) return; Opt_trace_context *const trace = &thd->opt_trace; if (!trace->is_started()) { assert(false); trace->disable_I_S_for_this_and_children(); return; } Security_context *const backup_thd_sctx = thd->security_context(); thd->set_security_context(&thd->m_main_security_ctx); const Table_ref *const first_not_own_table = thd->lex->first_not_own_table(); for (Table_ref *t = tbl; t != nullptr && t != first_not_own_table; t = t->next_global) { DBUG_PRINT("opt", ("table: '%s'", t->table_name)); /* Anonymous derived tables (as in "SELECT ... FROM (SELECT ...)") don't have their grant.privilege set. */ if (!(t->is_derived() || t->is_table_function())) { const GRANT_INFO backup_grant_info = t->grant; Security_context *const backup_table_sctx = t->security_ctx; t->security_ctx = nullptr; /* (1) check_table_access() fills t->grant.privilege. (2) Because SELECT privileges can be column-based, check_table_access() will return 'false' as long as there is SELECT privilege on one column. But we want a table-level privilege. */ bool rc = check_table_access(thd, SELECT_ACL, t, false, 1, true) || // (1) ((t->grant.privilege & SELECT_ACL) == 0); // (2) if (t->is_view()) { /* It's a view which has already been opened: we are executing a prepared statement. The view has been unfolded in the global list of tables. So underlying tables will be automatically checked in the present function, but we need an explicit check of SHOW VIEW: */ rc |= check_table_access(thd, SHOW_VIEW_ACL, t, false, 1, true); } t->security_ctx = backup_table_sctx; t->grant = backup_grant_info; if (rc) { trace->missing_privilege(); break; } } } thd->set_security_context(backup_thd_sctx); } } // namespace int fill_optimizer_trace_info(THD *thd, Table_ref *tables, Item *) { TABLE *table = tables->table; Opt_trace_info info; /* When executing a routine which is SQL SECURITY DEFINER, opt-trace specific checks are done with the connected user's privileges; this isn't respecting the meaning of SQL SECURITY DEFINER. If a highly privileged user doesn't know that, he may confidently execute a routine, while this routine nastily uses the connected user's privileges to be allowed to do tracing and gain knowledge about secret objects. This possibility is prevented, by making I_S.OPTIMIZER_TRACE look empty when read from a security context which isn't the connected user's context; with an exception if the SUID security context has all global privileges (in which case the nasty definer has anyway all rights to trace everything). Objects which are SQL SECURITY INVOKER are not considered here: with or without optimizer trace, a highly privileged user must always inspect the body of such object before invoking it. */ if (!(thd->security_context()->check_access((GLOBAL_ACLS & ~GRANT_ACL), tables->get_db_name())) && (0 != strcmp(thd->m_main_security_ctx.priv_user().str, thd->security_context()->priv_user().str) || 0 != my_strcasecmp(system_charset_info, thd->m_main_security_ctx.priv_host().str, thd->security_context()->priv_host().str))) return 0; /* The list must not change during the iterator's life time. This is ok as the life time is only the present block which cannot change the list. */ for (Opt_trace_iterator it(&thd->opt_trace); !it.at_end(); it.next()) { it.get_value(&info); restore_record(table, s->default_values); /* We will put the query, which is in character_set_client, into a column using character_set_client; this is better than UTF8 (see BUG#57306). When literals with introducers are used, see "LiteralsWithIntroducers" in this file. */ table->field[0]->store(info.query_ptr, static_cast(info.query_length), info.query_charset); table->field[1]->store(info.trace_ptr, static_cast(info.trace_length), system_charset_info); table->field[2]->store(info.missing_bytes, true); table->field[3]->store(info.missing_priv, true); if (schema_table_store_record(thd, table)) return 1; } return 0; } ST_FIELD_INFO optimizer_trace_info[] = { /* name, length, type, value, maybe_null, old_name, open_method */ {"QUERY", 65535, MYSQL_TYPE_STRING, 0, false, nullptr, 0}, {"TRACE", 65535, MYSQL_TYPE_STRING, 0, false, nullptr, 0}, {"MISSING_BYTES_BEYOND_MAX_MEM_SIZE", 20, MYSQL_TYPE_LONG, 0, false, nullptr, 0}, {"INSUFFICIENT_PRIVILEGES", 1, MYSQL_TYPE_TINY, 0, false, nullptr, 0}, {nullptr, 0, MYSQL_TYPE_STRING, 0, true, nullptr, 0}}; /* LiteralsWithIntroducers : They may be significantly altered; but this isn't specific to the optimizer trace, it also happens with SHOW PROCESSLIST, and is deemed a not too important problem. Consider mysql> set names latin1; mysql> SELECT 'í', _cp850'í'; | í | Ý | This sends the binary string: SELECT <0xED>, _cp850<0xED> to the server (í is 0xED in latin1). Now we put this into OPTIMIZER_TRACE.QUERY, using latin1 (character_set_client), and the client has switched to utf8: we convert the query from latin1 to utf8 when sending to client, which receives: SELECT <0xC3><0xAD>, _cp850<0xC3><0xAD> (í is <0xC3><0xAD> in utf8). But <0xC3><0xAD> in _cp850 means a completely different character: mysql> set names utf8; mysql> SELECT 'í', _cp850'í'; | í | ├¡ | If the client had originally issued SELECT 'í', _cp850 0xED; there would be no problem ('0', 'x', 'E', and 'D' are identical in latin1 and utf8: they would be preserved during conversion). */