/* Copyright (c) 2020, 2024, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is designed to work with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have either included with the program or referenced in the documentation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef SQL_JOIN_OPTIMIZER_RELATIONAL_EXPRESSION_H #define SQL_JOIN_OPTIMIZER_RELATIONAL_EXPRESSION_H #include #include "sql/item.h" #include "sql/join_optimizer/bit_utils.h" #include "sql/join_optimizer/estimate_selectivity.h" #include "sql/join_optimizer/node_map.h" #include "sql/join_optimizer/overflow_bitset.h" #include "sql/join_type.h" #include "sql/mem_root_array.h" #include "sql/sql_class.h" struct AccessPath; class Item_eq_base; class Item_func_eq; // Some information about each predicate that the join optimizer would like to // have available in order to avoid computing it anew for each use of that // predicate. struct CachedPropertiesForPredicate { Mem_root_array contained_subqueries; double selectivity; // For equijoins only: A bitmap of which sargable predicates // are part of the same multi-equality as this one (except the // condition itself, which is excluded), and thus are redundant // against it. This is used in AlreadyAppliedThroughSargable() // to quickly find out if we already have applied any of them // as a join condition. OverflowBitset redundant_against_sargable_predicates; }; // Describes a rule disallowing specific joins; if any tables from // needed_to_activate_rule is part of the join, then _all_ tables from // required_nodes must also be present. // // See FindHyperedgeAndJoinConflicts() for details. struct ConflictRule { hypergraph::NodeMap needed_to_activate_rule; hypergraph::NodeMap required_nodes; }; /** RelationalExpression objects in the same companion set are those that are inner-joined against each other; we use this to see in what parts of the graph we allow cycles. (Within companion sets, we are also allowed to add Cartesian products if we deem that an advantage, but we don't do it currently.) Tables may be alone in their companion sets. Companion sets are also used when calculating selectivity for equijoin predicates using multi-field indexes, @see EstimateEqualPredicateSelectivity()). */ class CompanionSet final { public: CompanionSet() = default; explicit CompanionSet(THD *thd) : m_equal_terms(thd->mem_root) {} /// No copying. CompanionSet(const CompanionSet &) = delete; CompanionSet &operator=(const CompanionSet &) = delete; /// Add the set of equal fields specified by 'func_eq'. void AddEquijoinCondition(THD *thd, const Item_func_eq &eq); /** If 'field' is part of an equijoin predicate in this CompanionSet, return a table_map of the tables involved in that predicate. Otherwise, return 0. */ table_map GetEqualityMap(const Field &field) const; /// For tracing and debugging. /// @returns A string representation like "{{t1.f1, t2.f2}, {t2.f3, t3.f4}}". std::string ToString() const; private: using FieldArray = Mem_root_array; /** This represents equality between a set of fields, i.e. "t1.f1=t2.f2...=tN.fN". */ struct EqualTerm { /// The fields that are equal to each other. FieldArray *fields; /// A map of all tables in 'fields'. table_map tables; }; /** The set of sets of fields in equijoin predicates in this companion set. (@see EstimateEqualPredicateSelectivity() to see how this is utilized.) For example, if we have: SELECT ... FROM t1, t2, t3 WHERE t1.x=t2.x AND t2.x=t3.x AND t2.y=t3.y m_equal_terms will contain: {{t1.x, t2.x, t3.x}, {t2.y, t3.y}} */ Mem_root_array m_equal_terms; }; /** Represents an expression tree in the relational algebra of joins. Expressions are either tables, or joins of two expressions. (Joins can have join conditions, but more general filters are not represented in this structure.) These are used as an abstract precursor to the join hypergraph; they represent the joins in the query block more or less directly, without any reordering. (The parser should largely have output a structure like this instead of Table_ref, but we are not there yet.) The only real manipulation we do on them is pushing down conditions, identifying equijoin conditions from other join conditions, and identifying join conditions that touch given tables (also a form of pushdown). */ struct RelationalExpression { enum Type { INNER_JOIN = static_cast(JoinType::INNER), LEFT_JOIN = static_cast(JoinType::OUTER), /// Left semijoin. SEMIJOIN = static_cast(JoinType::SEMI), /// Left antijoin. ANTIJOIN = static_cast(JoinType::ANTI), // STRAIGHT_JOIN is an inner join that the user has specified // is noncommutative (as a hint, but one we are not allowed to // disregard). STRAIGHT_INNER_JOIN = 101, // Generally supported by the conflict detector only, not the parser // or any iterators. We include this because we will be needing it // when we actually implement full outer join, and because it helps // verifying semijoin correctness in the unit tests (see the CountPlans* // tests). FULL_OUTER_JOIN = static_cast(JoinType::FULL_OUTER), // An inner join between two _or more_ tables, with no join conditions. // This is a special form used only during pushdown, for increased // flexibility in reordering. MULTI_INNER_JOIN nodes do not use // left and right, but rather store all its children in multi_children // (which is empty for all other types). MULTI_INNER_JOIN = 102, TABLE = 100 } type; explicit RelationalExpression(THD *thd) : multi_children(thd->mem_root), join_conditions(thd->mem_root), equijoin_conditions(thd->mem_root), properties_for_join_conditions(thd->mem_root), properties_for_equijoin_conditions(thd->mem_root), m_pushable_conditions(thd->mem_root) {} table_map tables_in_subtree; // Exactly the same as tables_in_subtree, just with node indexes instead of // table indexes. This is stored alongside tables_in_subtree to save the cost // and convenience of doing repeated translation between the two. hypergraph::NodeMap nodes_in_subtree; // If type == TABLE. const Table_ref *table; // The CompanionSet that this object is part of. CompanionSet *companion_set{nullptr}; // If type != TABLE. Note that equijoin_conditions will be split off // from join_conditions fairly late (at CreateHashJoinConditions()), // so often, you will see equijoin conditions in join_condition.. RelationalExpression *left, *right; Mem_root_array multi_children; // See MULTI_INNER_JOIN. Mem_root_array join_conditions; Mem_root_array equijoin_conditions; // For each element in join_conditions and equijoin_conditions (respectively), // contains some cached properties that the join optimizer would like to have // available for frequent reuse. // // It is a bit awkward to have these separate instead of in the same arrays, // but the latter would complicate MakeJoinHypergraph() a fair amount, // as this information is private to the join optimizer (ie., it is not // generated along with the hypergraph; it is added after MakeJoinHypergraph() // is completed). Mem_root_array properties_for_join_conditions; Mem_root_array properties_for_equijoin_conditions; // If true, at least one condition under “join_conditions” is a false (0) // constant. (Such conditions can never be under “equijoin_conditions”.) bool join_conditions_reject_all_rows{false}; table_map conditions_used_tables{0}; // If the join conditions were also added as predicates due to cycles // in the graph (see comment in AddCycleEdges()), contains a range of // which indexes they got in the predicate list. This is so that we know that // they are redundant and don't have to apply them if we actually apply this // join (as opposed to getting the edge implicitly by means of joining the // tables along some other way in the cycle). int join_predicate_first{0}, join_predicate_last{0}; // Conflict rules that must be checked before making a subgraph // out of this join; this is in addition to the regular connectivity // check. See FindHyperedgeAndJoinConflicts() for more details. Mem_root_array conflict_rules; const Mem_root_array &pushable_conditions() const { return m_pushable_conditions; } /// Add a condition that can be pushed down to the acces path for 'table'. void AddPushable(Item *cond) { assert(type == TABLE); assert(table->map() && cond->used_tables() != 0); // Don't add duplicates. if (std::none_of( m_pushable_conditions.cbegin(), m_pushable_conditions.cend(), [&](const Item *other) { return ItemsAreEqual(cond, other); })) { m_pushable_conditions.push_back(cond); } } private: /// Conditions that can be pushed down to the acces path for 'table' Mem_root_array m_pushable_conditions; }; // Check conflict rules; usually, they will be empty, but the hyperedges are // not able to encode every single combination of disallowed joins. inline bool PassesConflictRules(hypergraph::NodeMap joined_tables, const RelationalExpression *expr) { for (const ConflictRule &rule : expr->conflict_rules) { if (Overlaps(joined_tables, rule.needed_to_activate_rule) && !IsSubset(rule.required_nodes, joined_tables)) { return false; } } return true; } // Whether (a b) === (b a). See also OperatorsAreAssociative() and // OperatorsAre{Left,Right}Asscom() in make_join_hypergraph.cc. inline bool OperatorIsCommutative(const RelationalExpression &expr) { return expr.type == RelationalExpression::INNER_JOIN || expr.type == RelationalExpression::FULL_OUTER_JOIN; } // Call the given functor on each non-table operator in the tree below expr, // including expr itself, in post-traversal order. template void ForEachJoinOperator(RelationalExpression *expr, Func &&func) { if (expr->type == RelationalExpression::TABLE) { return; } ForEachJoinOperator(expr->left, std::forward(func)); ForEachJoinOperator(expr->right, std::forward(func)); func(expr); } template void ForEachOperator(RelationalExpression *expr, Func &&func) { if (expr->type != RelationalExpression::TABLE) { ForEachOperator(expr->left, std::forward(func)); ForEachOperator(expr->right, std::forward(func)); } func(expr); } /// The collection of CompanionSet objects for a given JoinHypergraph. class CompanionSetCollection final { public: CompanionSetCollection(THD *thd, struct RelationalExpression *root) { Compute(thd, root, nullptr); } /// No copying. CompanionSetCollection(const CompanionSetCollection &) = delete; CompanionSetCollection &operator=(const CompanionSetCollection &) = delete; CompanionSet *Find(table_map tables) { return FindInternal(tables); } const CompanionSet *Find(table_map tables) const { return FindInternal(tables); } /// For trace and debugging. std::string ToString() const; private: /// A mapping from table number to CompanionSet. std::array m_table_num_to_companion_set{nullptr}; /** Compute the CompanionSet of 'expr' and all of its descendants. @param thd The current thread. @param expr Compute CompanionSet of this and all of its descendants. @param current_set The CompanionSet to which 'expr' will belong, or nullptr if 'expr' is the root of a new set. */ void Compute(THD *thd, RelationalExpression *expr, CompanionSet *current_set); /** For a given set of tables, find the CompanionSet they are part of Returns nullptr if the tables are in different (i.e., incompatible) CompanionSet instances. If so, a condition using this set of tables can _not_ induce a new (cycle) edge in the hypergraph, as there are non-inner joins in the way. */ CompanionSet *FindInternal(table_map tables) const; }; #endif // SQL_JOIN_OPTIMIZER_RELATIONAL_EXPRESSION_H