CREATETABLE t1(id INTPRIMARY KEY, a INT, b INT, INDEX(a)); DELIMITER ;; CREATEPROCEDURE idata() BEGIN DECLARE i INT;
SET i=1; WHILE (i<=1000) DO INSERTINTO t1 VALUES (i,i,i); SET i=i+1; END WHILE; END;; DELIMITER ; CALL idata();
执行语句
1 2 3 4 5 6 7 8 9 10
(SELECT1000AS f) UNION (SELECT id FROM t1 ORDERBY id DESC LIMIT 2);
mysql> EXPLAIN (SELECT1000AS f) UNION (SELECT id FROM t1 ORDERBY id DESC LIMIT 2); +----+--------------+------------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+ | id | select_type |table| partitions | type | possible_keys | key | key_len |ref|rows| filtered | Extra | +----+--------------+------------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+ |1|PRIMARY|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|No tables used | |2|UNION| t1 |NULL| index |NULL|PRIMARY|4|NULL|2|100.00| Backward index scan; Using index | |NULL|UNIONRESULT|<union1,2>|NULL|ALL|NULL|NULL|NULL|NULL|NULL|NULL|Using temporary | +----+--------------+------------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+
第二行的Key=PRIMARY,说明第二个子查询用到了索引id
第三行的Extra字段为Using temporary
表示在对子查询的结果做UNION RESULT的时候,使用了临时表
UNION RESULT
创建一个内存临时表,这个内存临时表只有一个整型字段f,并且f为主键
执行第一个子查询,得到1000,并存入内存临时表中
执行第二个子查询
拿到第一行id=1000,试图插入到内存临时表,但由于1000这个值已经存在于内存临时表
违反唯一性约束,插入失败,继续执行
拿到第二行id=999,插入内存临时表成功
从内存临时表中按行取出数据,返回结果,并删除内存临时表,结果中包含id=1000和id=999两行
内存临时表起到了暂存数据的作用,还用到了内存临时表主键id的唯一性约束,实现UNION的语义
UNION ALL
UNION ALL没有去重的语义,一次执行子查询,得到的结果直接发给客户端,不需要内存临时表
1 2 3 4 5 6 7
mysql> EXPLAIN (SELECT1000AS f) UNIONALL (SELECT id FROM t1 ORDERBY id DESC LIMIT 2); +----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+ | id | select_type |table| partitions | type | possible_keys | key | key_len |ref|rows| filtered | Extra | +----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+ |1|PRIMARY|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|No tables used | |2|UNION| t1 |NULL| index |NULL|PRIMARY|4|NULL|2|100.00| Backward index scan; Using index | +----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+----------------------------------+
-- MySQL 5.6上执行 mysql> EXPLAIN SELECT id%10AS m, COUNT(*) AS c FROM t1 GROUPBY m; +----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------+ | id | select_type |table| type | possible_keys | key | key_len |ref|rows| Extra | +----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------+ |1| SIMPLE | t1 | index |PRIMARY,a | a |5|NULL|1000|Using index; Using temporary; Using filesort | +----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------+
mysql>SELECT id%10AS m, COUNT(*) AS c FROM t1 GROUPBY m; +------+-----+ | m | c | +------+-----+ |0|100| |1|100| |2|100| |3|100| |4|100| |5|100| |6|100| |7|100| |8|100| |9|100| +------+-----+
-- 跳过最后的排序阶段,直接从临时表中取回数据 mysql> EXPLAIN SELECT id%10AS m, COUNT(*) AS c FROM t1 GROUPBY m ORDERBYNULL; +----+-------------+-------+-------+---------------+------+---------+------+------+------------------------------+ | id | select_type |table| type | possible_keys | key | key_len |ref|rows| Extra | +----+-------------+-------+-------+---------------+------+---------+------+------+------------------------------+ |1| SIMPLE | t1 | index |PRIMARY,a | a |5|NULL|1000|Using index; Using temporary | +----+-------------+-------+-------+---------------+------+---------+------+------+------------------------------+
-- t1中的数据是从1开始的 mysql>SELECT id%10AS m, COUNT(*) AS c FROM t1 GROUPBY m ORDERBYNULL; +------+-----+ | m | c | +------+-----+ |1|100| |2|100| |3|100| |4|100| |5|100| |6|100| |7|100| |8|100| |9|100| |0|100| +------+-----+
内存不足
1
SET tmp_table_size=1024;
执行语句
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
-- 内存临时表的上限为1024 Bytes,但内存临时表不能完全放下100行数据,内存临时表会转成磁盘临时表,默认采用InnoDB引擎 -- 如果t1很大,这个查询需要的磁盘临时表就会占用大量的磁盘空间 mysql>SELECT id%100AS m, count(*) AS c FROM t1 GROUPBY m ORDERBYNULL LIMIT 10; +------+----+ | m | c | +------+----+ |1|10| |2|10| |3|10| |4|10| |5|10| |6|10| |7|10| |8|10| |9|10| |10|10| +------+----+
优化方案
优化索引
不论使用内存临时表还是磁盘临时表,GROUP BY都需要构造一个带唯一索引的表,_执行代价较高_
需要临时表的原因:每一行的id%100是无序的,因此需要临时表,来记录并统计结果
如果可以确保输入的数据是有序的,那么计算GROUP BY时,只需要从左到右顺序扫描,依次累加即可
当碰到第一个1的时候,已经累积了X个0,结果集里的第一行为(0,X)
当碰到第一个2的时候,已经累积了Y个1,结果集里的第一行为(1,Y)
整个过程不需要临时表,也不需要排序
1 2 3 4 5 6 7 8 9 10
-- MySQL 5.7上执行 ALTERTABLE t1 ADDCOLUMN z INT GENERATED ALWAYS AS(id %100), ADD INDEX(z);
-- 使用了覆盖索引,不需要临时表,也不需要排序 mysql> EXPLAIN SELECT z, COUNT(*) AS c FROM t1 GROUPBY z; +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+ | id | select_type |table| partitions | type | possible_keys | key | key_len |ref|rows| filtered | Extra | +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+ |1| SIMPLE | t1 |NULL| index | z | z |5|NULL|1000|100.00|Using index | +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
-- 没有再使用临时表,而是直接使用了排序算法 mysql> EXPLAIN SELECT SQL_BIG_RESULT id%100AS m, COUNT(*) AS c FROM t1 GROUPBY m; +----+-------------+-------+-------+---------------+------+---------+------+------+-----------------------------+ | id | select_type |table| type | possible_keys | key | key_len |ref|rows| Extra | +----+-------------+-------+-------+---------------+------+---------+------+------+-----------------------------+ |1| SIMPLE | t1 | index |PRIMARY,a | a |5|NULL|1000|Using index; Using filesort | +----+-------------+-------+-------+---------------+------+---------+------+------+-----------------------------+