不等於 != , <>
-- 1 有 pk 的情況
create table it201010a (
id int not null primary key
, val int not null
);
insert into it201010a
select generate_series(1,1e5)
, random() * 10000;
-- 建立 index
create index on it201010a(val);
commit;
analyze it201010a;
explain (analyze,verbose,timing,costs,buffers)
select *
from it201010a
where val <> 100;
+-------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-------------------------------------------------------------------------------------------------------------------+
| Seq Scan on aki.it201010a (cost=0.00..1693.00 rows=99990 width=8) (actual time=0.040..18.171 rows=99995 loops=1) |
| Output: id, val |
| Filter: (it201010a.val <> 100) |
| Rows Removed by Filter: 5 |
| Buffers: shared hit=443 |
| Planning Time: 0.123 ms |
| Execution Time: 31.306 ms |
+-------------------------------------------------------------------------------------------------------------------+
-- 可以觀察到,當過濾條件是不等於(<>)時, 不使用 index
explain (analyze,timing,costs)
select t1.*
from it201010a t1
left join it201010a t2
on (t1.id = t2.id
and t2.val = 100)
where t2.* is null;
+---------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+---------------------------------------------------------------------------------------------------------------------------------------+
| Hash Left Join (cost=40.24..1745.75 rows=500 width=8) (actual time=0.087..38.241 rows=99995 loops=1) |
| Hash Cond: (t1.id = t2.id) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 5 |
| -> Seq Scan on it201010a t1 (cost=0.00..1443.00 rows=100000 width=8) (actual time=0.012..12.280 rows=100000 loops=1) |
| -> Hash (cost=40.11..40.11 rows=10 width=36) (actual time=0.060..0.061 rows=5 loops=1) |
| Buckets: 1024 Batches: 1 Memory Usage: 9kB |
| -> Bitmap Heap Scan on it201010a t2 (cost=4.50..40.11 rows=10 width=36) (actual time=0.044..0.054 rows=5 loops=1) |
| Recheck Cond: (val = 100) |
| Heap Blocks: exact=5 |
| -> Bitmap Index Scan on it201010a_val_idx (cost=0.00..4.49 rows=10 width=0) (actual time=0.031..0.032 rows=5 loops=1) |
| Index Cond: (val = 100) |
| Planning Time: 0.371 ms |
| Execution Time: 46.063 ms |
+---------------------------------------------------------------------------------------------------------------------------------------+
-- 使用了 index.
set enable_seqscan=off;
explain (analyze,timing,costs)
select t1.*
from it201010a t1
left join it201010a t2
on (t1.id = t2.id
and t2.val = 100)
where t2.* is null;
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| Merge Left Join (cost=40.57..3340.72 rows=500 width=8) (actual time=3.756..55.155 rows=99995 loops=1) |
| Merge Cond: (t1.id = t2.id) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 5 |
| -> Index Scan using it201010a_pkey on it201010a t1 (cost=0.29..3050.29 rows=100000 width=8) (actual time=0.054..27.778 rows=100000 loops=1) |
| -> Sort (cost=40.28..40.30 rows=10 width=36) (actual time=3.692..3.694 rows=5 loops=1) |
| Sort Key: t2.id |
| Sort Method: quicksort Memory: 25kB |
| -> Bitmap Heap Scan on it201010a t2 (cost=4.50..40.11 rows=10 width=36) (actual time=0.252..0.360 rows=5 loops=1) |
| Recheck Cond: (val = 100) |
| Heap Blocks: exact=5 |
| -> Bitmap Index Scan on it201010a_val_idx (cost=0.00..4.49 rows=10 width=0) (actual time=0.128..0.128 rows=5 loops=1) |
| Index Cond: (val = 100) |
| Planning Time: 1.516 ms |
| Execution Time: 65.098 ms |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
set enable_sort=off;
explain (analyze,timing,costs)
select t1.*
from it201010a t1
left join it201010a t2
on (t1.id = t2.id
and t2.val = 100)
where t2.* is null;
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| Hash Left Join (cost=40.53..3353.04 rows=500 width=8) (actual time=0.153..47.913 rows=99995 loops=1) |
| Hash Cond: (t1.id = t2.id) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 5 |
| -> Index Scan using it201010a_pkey on it201010a t1 (cost=0.29..3050.29 rows=100000 width=8) (actual time=0.019..21.066 rows=100000 loops=1) |
| -> Hash (cost=40.11..40.11 rows=10 width=36) (actual time=0.088..0.089 rows=5 loops=1) |
| Buckets: 1024 Batches: 1 Memory Usage: 9kB |
| -> Bitmap Heap Scan on it201010a t2 (cost=4.50..40.11 rows=10 width=36) (actual time=0.032..0.043 rows=5 loops=1) |
| Recheck Cond: (val = 100) |
| Heap Blocks: exact=5 |
| -> Bitmap Index Scan on it201010a_val_idx (cost=0.00..4.49 rows=10 width=0) (actual time=0.019..0.019 rows=5 loops=1) |
| Index Cond: (val = 100) |
| Planning Time: 0.331 ms |
| Execution Time: 55.753 ms |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
(14 rows)
set enable_hashjoin=off;
explain (analyze,timing,costs)
select t1.*
from it201010a t1
left join it201010a t2
on (t1.id = t2.id
and t2.val = 100)
where t2.* is null;
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
| Merge Left Join (cost=0.58..6600.71 rows=500 width=8) (actual time=4.255..61.982 rows=99995 loops=1) |
| Merge Cond: (t1.id = t2.id) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 5 |
| -> Index Scan using it201010a_pkey on it201010a t1 (cost=0.29..3050.29 rows=100000 width=8) (actual time=0.011..19.161 rows=100000 loops=1) |
| -> Index Scan using it201010a_pkey on it201010a t2 (cost=0.29..3300.29 rows=10 width=36) (actual time=4.237..18.359 rows=5 loops=1) |
| Filter: (val = 100) |
| Rows Removed by Filter: 99995 |
| Planning Time: 0.348 ms |
| Execution Time: 69.319 ms |
+-------------------------------------------------------------------------------------------------------------------------------------------------+
(10 rows)
---------
set enable_hashjoin=on;
set enable_sort=on;
set enable_seqscan=on;
--------
-- 2 沒有 PK 的情況
create table it201010b (
val int not null
);
insert into it201010b
select random() * 10000
from generate_series(1,1e5);
create index on it201010b(val);
commit;
analyze it201010b;
-- 沒有PK,我們可以用系統的ctid
explain (analyze,timing,costs)
select t1.*
from it201010b t1
left join it201010b t2
on (t1.ctid = t2.ctid
and t2.val = 100)
where t2.* is null;
+---------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+---------------------------------------------------------------------------------------------------------------------------------------+
| Hash Left Join (cost=40.24..1858.34 rows=500 width=4) (actual time=0.196..39.142 rows=99991 loops=1) |
| Hash Cond: (t1.ctid = t2.ctid) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 9 |
| -> Seq Scan on it201010b t1 (cost=0.00..1443.00 rows=100000 width=10) (actual time=0.018..16.793 rows=100000 loops=1) |
| -> Hash (cost=40.11..40.11 rows=10 width=34) (actual time=0.117..0.118 rows=9 loops=1) |
| Buckets: 1024 Batches: 1 Memory Usage: 9kB |
| -> Bitmap Heap Scan on it201010b t2 (cost=4.50..40.11 rows=10 width=34) (actual time=0.091..0.108 rows=9 loops=1) |
| Recheck Cond: (val = 100) |
| Heap Blocks: exact=9 |
| -> Bitmap Index Scan on it201010b_val_idx (cost=0.00..4.49 rows=10 width=0) (actual time=0.051..0.051 rows=9 loops=1) |
| Index Cond: (val = 100) |
| Planning Time: 0.223 ms |
| Execution Time: 47.081 ms |
+---------------------------------------------------------------------------------------------------------------------------------------+
----------
-- 3 Array
create table it201010c (
id int generated always as identity
, arr int[] not null
);
do $$
declare
i int;
begin
for i in 1..10000 loop
insert into it201010c(arr)
select array_agg(round(random() * 1e3))
from generate_series(1,100);
end loop;
end;
$$ language plpgsql;
commit;
create index on it201010c using gin(arr);
analyze it201010c;
--
explain (analyze,timing,costs)
select *
from it201010c
where not (arr @> array[1,2]);
+--------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+--------------------------------------------------------------------------------------------------------------+
| Seq Scan on it201010c (cost=0.00..714.00 rows=9913 width=428) (actual time=0.027..12.739 rows=9921 loops=1) |
| Filter: (NOT (arr @> '{1,2}'::integer[])) |
| Rows Removed by Filter: 79 |
| Planning Time: 3.535 ms |
| Execution Time: 13.665 ms |
+--------------------------------------------------------------------------------------------------------------+
-- 一樣用 left join , null
explain (analyze,timing,costs)
select t1.*
from it201010c t1
left join it201010c t2
on (t1.id = t2.id
and t2.arr @> array[1,2])
where t2.* is null;
+-----------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-----------------------------------------------------------------------------------------------------------------------------------------+
| Hash Left Join (cost=259.06..986.43 rows=50 width=428) (actual time=4.541..10.398 rows=9921 loops=1) |
| Hash Cond: (t1.id = t2.id) |
| Filter: (t2.* IS NULL) |
| Rows Removed by Filter: 79 |
| -> Seq Scan on it201010c t1 (cost=0.00..689.00 rows=10000 width=428) (actual time=0.008..2.415 rows=10000 loops=1) |
| -> Hash (cost=257.97..257.97 rows=87 width=456) (actual time=4.523..4.524 rows=79 loops=1) |
| Buckets: 1024 Batches: 1 Memory Usage: 46kB |
| -> Bitmap Heap Scan on it201010c t2 (cost=20.67..257.97 rows=87 width=456) (actual time=4.366..4.480 rows=79 loops=1) |
| Recheck Cond: (arr @> '{1,2}'::integer[]) |
| Heap Blocks: exact=76 |
| -> Bitmap Index Scan on it201010c_arr_idx (cost=0.00..20.65 rows=87 width=0) (actual time=4.325..4.326 rows=79 loops=1) |
| Index Cond: (arr @> '{1,2}'::integer[]) |
| Planning Time: 3.118 ms |
| Execution Time: 11.486 ms |
+-----------------------------------------------------------------------------------------------------------------------------------------+
-- 利用 not exists
explain (analyze,timing,costs)
select t1.*
from it201010c t1
where not exists (
select 1
from it201010c t2
where t1.id = t2.id
and t2.arr @> array[1,2]);
+-----------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-----------------------------------------------------------------------------------------------------------------------------------------+
| Hash Anti Join (cost=259.06..1073.54 rows=9913 width=428) (actual time=0.404..7.478 rows=9921 loops=1) |
| Hash Cond: (t1.id = t2.id) |
| -> Seq Scan on it201010c t1 (cost=0.00..689.00 rows=10000 width=428) (actual time=0.008..2.913 rows=10000 loops=1) |
| -> Hash (cost=257.97..257.97 rows=87 width=4) (actual time=0.374..0.375 rows=79 loops=1) |
| Buckets: 1024 Batches: 1 Memory Usage: 11kB |
| -> Bitmap Heap Scan on it201010c t2 (cost=20.67..257.97 rows=87 width=4) (actual time=0.233..0.342 rows=79 loops=1) |
| Recheck Cond: (arr @> '{1,2}'::integer[]) |
| Heap Blocks: exact=76 |
| -> Bitmap Index Scan on it201010c_arr_idx (cost=0.00..20.65 rows=87 width=0) (actual time=0.215..0.215 rows=79 loops=1) |
| Index Cond: (arr @> '{1,2}'::integer[]) |
| Planning Time: 0.259 ms |
| Execution Time: 8.959 ms |
+-----------------------------------------------------------------------------------------------------------------------------------------+
沒有PK時,一樣可以利用 ctid, 在此就不贅述.
今天我們探討了 不等於的情況下, 怎樣變化讓優化器可以使用index.
實際情況還要隨著資料分佈,index是否具有足夠的鑑別性,適當使用 index.
另外,也設定了幾個參數,讓大家可以觀察執行計畫的情況
set enable_hashjoin=on;
set enable_sort=on;
set enable_seqscan=on;
也使用了 array 來探討,並且使用了 not exists.
希望能帶給大家在使用時,有一些幫助.