iT邦幫忙

0

以Postgresql為主,再聊聊資料庫 PostgreSQL GIN index 介紹一


PostgreSQL 有多種形態的index, GIN 是其中一種.
我們來看看有哪些特色及應用

https://www.postgresql.org/docs/current/gin.html

create table it200918a (
  n1 integer not null
, n2 integer not null
, n3 integer not null
);

insert into it200918a
select i
     , i % 2
     , i % 3
  from generate_series(1, 1000000) i;

-- 補一下相容於 btree 支援的運算子
create extension btree_gin;

create index n2_btree on it200918a using btree(n2);

create index n2_gin on it200918a using gin(n2);

create index n3_btree on it200918a using btree(n3);

create index n3_gin on it200918a using gin(n3);

commit;

-- 看一下 index 的 size

SELECT n.nspname as "Schema"
     , c.relname as "Name"
     , pg_catalog.pg_get_userbyid(c.relowner) as "Owner"
     , c2.relname as "Table"
     , pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size"
  FROM pg_catalog.pg_class c
  LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
  LEFT JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid
  LEFT JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid
 WHERE c.relkind IN ('i','I','')
   AND n.nspname <> 'pg_catalog'
   AND n.nspname <> 'information_schema'
   AND n.nspname !~ '^pg_toast'
   AND pg_catalog.pg_table_is_visible(c.oid)
   AND c2.relname = 'it200918a'
 ORDER BY 1,2;

+--------+----------+-------+-----------+---------+
| Schema |   Name   | Owner |   Table   |  Size   |
+--------+----------+-------+-----------+---------+
| aki    | n2_btree | aki   | it200918a | 21 MB   |
| aki    | n2_gin   | aki   | it200918a | 1088 kB |
| aki    | n3_btree | aki   | it200918a | 21 MB   |
| aki    | n3_gin   | aki   | it200918a | 1096 kB |
+--------+----------+-------+-----------+---------+
(4 rows)

-------
接著再來看 GIN 應用的例子
搭配 array 做多值查詢
關於array 可以參考本人之前的介紹

https://ithelp.ithome.com.tw/articles/10222763

create table itgin0918a (
  id integer generated always as identity
, arr int[] not null
);

寫一個 PL/PgSQL block 來輸入十萬筆測試資料

do language plpgsql $_$
declare
begin
  for i in 1..100000 loop
    insert into itgin0918a(arr) select array(select random()*1000 from generate_series(1,20)); 
  end loop;
end;
$_$;

commit;

create index itgin0918a_idx on itgin0918a using gin(arr);
Time: 914.799 ms

使用上面的 SQL command 查詢 gin index size
+--------+----------------+-------+------------+---------+
| Schema |      Name      | Owner |   Table    |  Size   |
+--------+----------------+-------+------------+---------+
| aki    | itgin0918a_idx | aki   | itgin0918a | 8048 kB |
+--------+----------------+-------+------------+---------+

連1MB都不到!

使用 array 的 overlap 運算子 && 查詢

select count(*)
  from itgin0918a
 where arr && array[6,18];
+-------+
| count |
+-------+
|  4019 |
+-------+
(1 row)

Time: 11.881 ms

select count(*)
  from itgin0918a
 where arr && array[6,18,20];
+-------+
| count |
+-------+
|  5960 |
+-------+
(1 row)

Time: 10.415 ms

上面的查詢就是有涵蓋 {6, 18}, 第二道是涵蓋 {6,18,20}.
也可以說 arr 裡面包含任意一個元素,就滿足條件.
array 使用上有強大的彈性,搭 gin 速度又快.
接著來看查詢計畫.

explain (analyze,timing,costs,buffers)
select *
  from itgin0918a
 where arr && array[6,18,20];
+------------------------------------------------------------------------------------------------------------------------------+
|                                                          QUERY PLAN                                                          |
+------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on itgin0918a  (cost=69.50..1867.89 rows=5871 width=105) (actual time=2.630..6.531 rows=5960 loops=1)       |
|   Recheck Cond: (arr && '{6,18,20}'::integer[])                                                                              |
|   Heap Blocks: exact=1681                                                                                                    |
|   Buffers: shared hit=1691                                                                                                   |
|   ->  Bitmap Index Scan on itgin0918a_idx  (cost=0.00..68.03 rows=5871 width=0) (actual time=2.306..2.306 rows=5960 loops=1) |
|         Index Cond: (arr && '{6,18,20}'::integer[])                                                                          |
|         Buffers: shared hit=10                                                                                               |
| Planning Time: 0.187 ms                                                                                                      |
| Execution Time: 7.407 ms                                                                                                     |
+------------------------------------------------------------------------------------------------------------------------------+

接著來看單欄位搜尋,注意:上面已經有安裝了 btree_gin extension

create table itgin0918b (
  id int not null
, n1 int not null
);

insert into itgin0918b
select generate_series(1, 100000)
     , random() * 10;

建立 gin 形態 index

create index itgin0918b_idx on itgin0918b using gin(n1);

查看 index size
+--------+----------------+-------+------------+--------+
| Schema |      Name      | Owner |   Table    |  Size  |
+--------+----------------+-------+------------+--------+
| aki    | itgin0918b_idx | aki   | itgin0918b | 248 kB |
+--------+----------------+-------+------------+--------+

select count(*) 
  from itgin0918b
 where n1 = 6;
+-------+
| count |
+-------+
|  9982 |
+-------+
(1 row)

Time: 6.531 ms

接著來看查詢計畫.
explain (analyze,timing,costs,buffers)
select *
  from itgin0918b
 where n1 = 6;
+-------------------------------------------------------------------------------------------------------------------------------+
|                                                          QUERY PLAN                                                           |
+-------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on itgin0918b  (cost=97.73..666.11 rows=10030 width=8) (actual time=3.795..9.015 rows=9982 loops=1)          |
|   Recheck Cond: (n1 = 6)                                                                                                      |
|   Heap Blocks: exact=443                                                                                                      |
|   Buffers: shared hit=448                                                                                                     |
|   ->  Bitmap Index Scan on itgin0918b_idx  (cost=0.00..95.22 rows=10030 width=0) (actual time=3.688..3.688 rows=9982 loops=1) |
|         Index Cond: (n1 = 6)                                                                                                  |
|         Buffers: shared hit=5                                                                                                 |
| Planning Time: 1.138 ms                                                                                                       |
| Execution Time: 10.371 ms                                                                                                     |
+-------------------------------------------------------------------------------------------------------------------------------+

今天先介紹到此!


尚未有邦友留言

立即登入留言