select * from t1 where a = (select 1);
=>
select * from t1 where a = 1;
oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
=>
- oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
- oe $cmp$ \<max\>(SELECT ...) // handled by Item_maxmin_subselect
fails=>Item_in_optimizer
- 对于已经是materialized方案,不转换
- 通过equi-join转换IN到EXISTS
SELECT * FROM t1 WHERE a < ANY (SELECT a FROM t1);
=>
SELECT * FROM t1 WHERE a < (SELECT MAX(a) FROM t1)
SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2)
=>
SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2
[trigcond] HAVING t1.b=ref-to-<expr of SUM(t1.a)>)
不需要区分NULL和FALSE的子查询:
SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
需要区分的子查询:
SELECT 1 FROM ...
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
HAVING trigcond(@<is_not_null_test@>(ie))
for (each left operand)
create the equi-join condition
if (is_having_used || !abort_on_null)
create the "is null" and is_not_null_test items
if (is_having_used)
add the equi-join and the null tests to HAVING
else
add the equi-join and the "is null" to WHERE
add the is_not_null_test to HAVING
(l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where) =>
EXISTS (SELECT ... WHERE where and
(l1 = v1 or is null v1) and
(l2 = v2 or is null v2) and
(l3 = v3 or is null v3)
[ HAVING is_not_null_test(v1) and
is_not_null_test(v2) and
is_not_null_test(v3)) ] <-- 保证不为NULL可以去掉HAVING
(l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having) =>
EXISTS (SELECT ... HAVING having and
(l1 = v1 or is null v1) and
(l2 = v2 or is null v2) and
(l3 = v3 or is null v3) and
is_not_null_test(v1) and
is_not_null_test(v2) and
is_not_null_test(v3))
root:test> set optimizer_switch = 'subquery_to_derived=off';
Query OK, 0 rows affected (0.00 sec)
root:test> EXPLAIN SELECT b, MAX(a) AS ma FROM t4 GROUP BY b HAVING ma < (SELECT MAX(t2.a) FROM t2 WHERE t2.b=t4.b);
+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+
| 1 | PRIMARY | t4 | NULL | ALL | NULL | NULL | NULL | NULL | 10 | 100.00 | Using temporary |
| 2 | DEPENDENT SUBQUERY | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 33.33 | Using where |
+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+
2 rows in set, 3 warnings (0.00 sec)
root:test> set optimizer_switch = 'subquery_to_derived=on';
Query OK, 0 rows affected (0.00 sec)
root:test> EXPLAIN SELECT b, MAX(a) AS ma FROM t4 GROUP BY b HAVING ma < (SELECT MAX(t2.a) FROM t2 WHERE t2.b=t4.b);
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| 1 | PRIMARY | t4 | NULL | ALL | NULL | NULL | NULL | NULL | 10 | 100.00 | Using temporary |
| 1 | PRIMARY | <derived2> | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 100.00 | Using where; Using join buffer (hash join) |
| 2 | DERIVED | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 100.00 | Using temporary |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
3 rows in set, 3 warnings (0.01 sec)
SELECT SUM(c1), (SELECT SUM(c1) FROM t3) scalar FROM t1;
转换为=>
SELECT derived0.summ, derived1.scalar
FROM (SELECT SUM(a) AS summ FROM t1) AS derived0
LEFT JOIN
(SELECT SUM(b) AS scalar FROM t3) AS derived1
ON TRUE
执行计划如下:
explain SELECT SUM(a), (SELECT SUM(c1) FROM t3) scalar FROM t1;
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| 1 | PRIMARY | <derived3> | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | NULL |
| 1 | PRIMARY | <derived4> | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where; Using join buffer (hash join) |
| 4 | DERIVED | t3 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | NULL |
| 3 | DERIVED | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 2 | 100.00 | NULL |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
SELECT SUM(a), (SELECT SUM(b) FROM t3) scalar
FROM t1
HAVING SUM(a) > scalar;
> =
SELECT derived0.summ, derived1.scalar
FROM (SELECT SUM(a) AS summ FROM t1) AS derived0
LEFT JOIN
SUM(b) AS scalar FROM t3) AS derived1
ON TRUE
WHERE derived0.sum > derived1.scalar;
subq_item->sj_convert_priority =
(((dependent * MAX_TABLES_FOR_SIZE) + // dependent subqueries first
child_query_block->leaf_table_count) *
65536) + // then with many tables
(65536 - subq_no); // then based on position
for SELECT#1 WHERE X IN (SELECT #2 WHERE Y IN (SELECT#3)) :
Query_block::prepare() (select#1)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#2)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#3)
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #3 in #2
<- flatten_subqueries
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #2 in #1
FROM [tables] WHERE ... AND/OR oe IN (SELECT ie FROM it) ...
=>
FROM (tables) LEFT JOIN (SELECT DISTINCT ie FROM it) AS derived
ON oe = derived.ie WHERE ... AND/OR derived.ie IS NOT NULL ...
SELECT ...
FROM ot1 ... otN
WHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieM
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
=>
SELECT ...
FROM (ot1 ... otN) SJ (it1 ... itK)
ON (oe1, ... oeM) = (ie1, ..., ieM)
[AND inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...
FROM ot1 ... otN
WHERE EXISTS (SELECT expressions
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
=>
SELECT ...
FROM (ot1 ... otN) SJ (it1 ... itK)
[ON inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...
FROM ot1 ... otN
WHERE NOT EXISTS (SELECT expressions
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
=>
SELECT ...
FROM (ot1 ... otN) AJ (it1 ... itK)
[ON inner-cond]
[WHERE outer-cond AND is-null-cond(it1)]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...
FROM ot1 ... otN
WHERE (oe1, ... oeM) NOT IN (SELECT ie1, ..., ieM
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
=>
SELECT ...
FROM (ot1 ... otN) AJ (it1 ... itK)
ON (oe1, ... oeM) = (ie1, ..., ieM)
[AND inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT @@optimizer_switch\G
*************************** 1. row ***************************
@@optimizer_switch: ......
materialization=on,semijoin=on,loosescan=on,
firstmatch=on,
subquery_materialization_cost_based=on,
......
SELECT * FROM t1 WHERE t1.a in (SELECT t2.c1 FROM t2 where t2.c1 > 0);
=>
/* select#1 */
SELECT `t1`.`a` AS `a`
FROM `t1`
SEMI JOIN (`t2`)
WHERE ((`t1`.`a` = `t2`.`c1`) and (`t2`.`c1` > 0))
执行计划如下:
explain SELECT * FROM t1 WHERE t1.a in (SELECT t2.c1 FROM t2 where t2.c1 > 0);
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+
| 1 | SIMPLE | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where; Start temporary |
| 1 | SIMPLE | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 2 | 50.00 | Using where; End temporary; Using join buffer (hash join) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+
struct TABLE {
......
partition_info *part_info{nullptr}; /* Partition related information */
/* If true, all partitions have been pruned away */
bool all_partitions_pruned_away{false};
......
}
SQL tranformation phase
SELECT_LEX::apply_local_transforms
--> prune_partitions
for example, select * from employee where company_id = 1000 ;
SQL optimizer phase
JOIN::prune_table_partitions
--> prune_partitions
------> based on tbl->join_cond_optim() or JOIN::where_cond
for example, explain select * from employee where company_id = (select c1 from t1);
root:ref> CREATE TABLE R2 (
-> a INT,
-> d INT
-> ) PARTITION BY RANGE(a) (
-> PARTITION p20 VALUES LESS THAN (20),
-> PARTITION p40 VALUES LESS THAN (40),
-> PARTITION p60 VALUES LESS THAN (60),
-> PARTITION p80 VALUES LESS THAN (80),
-> PARTITION p100 VALUES LESS THAN MAXVALUE
-> );
Query OK, 0 rows affected (0.09 sec)
root:ref> Select * From R2 where a > 40 and a < 80;
(start)
| $
| Partitioning keyparts $ subpartitioning keyparts
| $
| ... ... $
| | | $
| +---------+ +---------+ $ +-----------+ +-----------+
\-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
+---------+ +---------+ $ +-----------+ +-----------+
| $ | |
| $ | +-----------+
| $ | | subpar2=c6|
| $ | +-----------+
| $ |
| $ +-----------+ +-----------+
| $ | subpar1=c4|--| subpar2=c8|
| $ +-----------+ +-----------+
| $
| $
+---------+ $ +------------+ +------------+
| par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
+---------+ $ +------------+ +------------+
| $
... $
例如第一行(par1=c1 and par2=c2 and subpar1=c3 and subpar2=c5)的遍历的stack将是:
in find_used_partitions(key_tree = "subpar2=c5") (***)
in find_used_partitions(key_tree = "subpar1=c3")
in find_used_partitions(key_tree = "par2=c2") (**)
in find_used_partitions(key_tree = "par1=c1")
in prune_partitions(...)
然后是继续下面的条件,以此类推
or(par1=c1 and par2=c2 and subpar1=c3 and subpar2=c6)
or(par1=c1 and par2=c2 and subpar1=c4 and subpar2=c8)
or(par1=c2 and subpar1=c10 and subpar2=c12)
root:test> set optimizer_switch = 'derived_merge=off'; // 关闭dervied_merge 测试下推能力
Query OK, 0 rows affected (0.00 sec)
root:test> EXPLAIN FORMAT=tree SELECT * FROM (SELECT c1,c2 FROM t1) as dt WHERE c1 > 10;
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| EXPLAIN |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| -> Table scan on dt (cost=2.51..2.51 rows=1)
-> Materialize (cost=2.96..2.96 rows=1)
-> Filter: (t1.c1 > 10) (cost=0.35 rows=1)
-> Table scan on t1 (cost=0.35 rows=1)
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
《WL#4389: Subquery optimizations: Make IN optimizations also handle EXISTS》
《WL#4245: Subquery optimization: Transform NOT EXISTS and NOT IN to anti-join》