PostgreSQL 有多種形態的index, GIN 是其中一種.
我們來看看有哪些特色及應用
https://www.postgresql.org/docs/current/gin.html
create table it200918a (
n1 integer not null
, n2 integer not null
, n3 integer not null
);
insert into it200918a
select i
, i % 2
, i % 3
from generate_series(1, 1000000) i;
-- 補一下相容於 btree 支援的運算子
create extension btree_gin;
create index n2_btree on it200918a using btree(n2);
create index n2_gin on it200918a using gin(n2);
create index n3_btree on it200918a using btree(n3);
create index n3_gin on it200918a using gin(n3);
commit;
-- 看一下 index 的 size
SELECT n.nspname as "Schema"
, c.relname as "Name"
, pg_catalog.pg_get_userbyid(c.relowner) as "Owner"
, c2.relname as "Table"
, pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size"
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid
LEFT JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid
WHERE c.relkind IN ('i','I','')
AND n.nspname <> 'pg_catalog'
AND n.nspname <> 'information_schema'
AND n.nspname !~ '^pg_toast'
AND pg_catalog.pg_table_is_visible(c.oid)
AND c2.relname = 'it200918a'
ORDER BY 1,2;
+--------+----------+-------+-----------+---------+
| Schema | Name | Owner | Table | Size |
+--------+----------+-------+-----------+---------+
| aki | n2_btree | aki | it200918a | 21 MB |
| aki | n2_gin | aki | it200918a | 1088 kB |
| aki | n3_btree | aki | it200918a | 21 MB |
| aki | n3_gin | aki | it200918a | 1096 kB |
+--------+----------+-------+-----------+---------+
(4 rows)
-------
接著再來看 GIN 應用的例子
搭配 array 做多值查詢
關於array 可以參考本人之前的介紹
https://ithelp.ithome.com.tw/articles/10222763
create table itgin0918a (
id integer generated always as identity
, arr int[] not null
);
寫一個 PL/PgSQL block 來輸入十萬筆測試資料
do language plpgsql $_$
declare
begin
for i in 1..100000 loop
insert into itgin0918a(arr) select array(select random()*1000 from generate_series(1,20));
end loop;
end;
$_$;
commit;
create index itgin0918a_idx on itgin0918a using gin(arr);
Time: 914.799 ms
使用上面的 SQL command 查詢 gin index size
+--------+----------------+-------+------------+---------+
| Schema | Name | Owner | Table | Size |
+--------+----------------+-------+------------+---------+
| aki | itgin0918a_idx | aki | itgin0918a | 8048 kB |
+--------+----------------+-------+------------+---------+
連1MB都不到!
使用 array 的 overlap 運算子 && 查詢
select count(*)
from itgin0918a
where arr && array[6,18];
+-------+
| count |
+-------+
| 4019 |
+-------+
(1 row)
Time: 11.881 ms
select count(*)
from itgin0918a
where arr && array[6,18,20];
+-------+
| count |
+-------+
| 5960 |
+-------+
(1 row)
Time: 10.415 ms
上面的查詢就是有涵蓋 {6, 18}, 第二道是涵蓋 {6,18,20}.
也可以說 arr 裡面包含任意一個元素,就滿足條件.
array 使用上有強大的彈性,搭 gin 速度又快.
接著來看查詢計畫.
explain (analyze,timing,costs,buffers)
select *
from itgin0918a
where arr && array[6,18,20];
+------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on itgin0918a (cost=69.50..1867.89 rows=5871 width=105) (actual time=2.630..6.531 rows=5960 loops=1) |
| Recheck Cond: (arr && '{6,18,20}'::integer[]) |
| Heap Blocks: exact=1681 |
| Buffers: shared hit=1691 |
| -> Bitmap Index Scan on itgin0918a_idx (cost=0.00..68.03 rows=5871 width=0) (actual time=2.306..2.306 rows=5960 loops=1) |
| Index Cond: (arr && '{6,18,20}'::integer[]) |
| Buffers: shared hit=10 |
| Planning Time: 0.187 ms |
| Execution Time: 7.407 ms |
+------------------------------------------------------------------------------------------------------------------------------+
接著來看單欄位搜尋,注意:上面已經有安裝了 btree_gin extension
create table itgin0918b (
id int not null
, n1 int not null
);
insert into itgin0918b
select generate_series(1, 100000)
, random() * 10;
建立 gin 形態 index
create index itgin0918b_idx on itgin0918b using gin(n1);
查看 index size
+--------+----------------+-------+------------+--------+
| Schema | Name | Owner | Table | Size |
+--------+----------------+-------+------------+--------+
| aki | itgin0918b_idx | aki | itgin0918b | 248 kB |
+--------+----------------+-------+------------+--------+
select count(*)
from itgin0918b
where n1 = 6;
+-------+
| count |
+-------+
| 9982 |
+-------+
(1 row)
Time: 6.531 ms
接著來看查詢計畫.
explain (analyze,timing,costs,buffers)
select *
from itgin0918b
where n1 = 6;
+-------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+-------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on itgin0918b (cost=97.73..666.11 rows=10030 width=8) (actual time=3.795..9.015 rows=9982 loops=1) |
| Recheck Cond: (n1 = 6) |
| Heap Blocks: exact=443 |
| Buffers: shared hit=448 |
| -> Bitmap Index Scan on itgin0918b_idx (cost=0.00..95.22 rows=10030 width=0) (actual time=3.688..3.688 rows=9982 loops=1) |
| Index Cond: (n1 = 6) |
| Buffers: shared hit=5 |
| Planning Time: 1.138 ms |
| Execution Time: 10.371 ms |
+-------------------------------------------------------------------------------------------------------------------------------+
今天先介紹到此!