以Postgresql為主,再聊聊資料庫 PostgreSQL BRIN index 介紹二

postgresql rdbms sql
一級屠豬士 2020-09-24 15:54:11 ‧ 1598 瀏覽
分享至
接著繼續探討 BRIN.

我們建立兩個 table ,都使用 BRIN,來做比較.

create table brin1 (
  id int not null
, val int not null
);

create table brin2 (
  id int not null
, val int not null
);

insert into brin1
select generate_series(1,1e6)
     , round(random() * 10);

insert into brin2
select id
     , round(random() * 10)
  from generate_series(1,1e6) as t(id)
 order by random();

analyze brin1;
analyze brin2;

接下來要來介紹 table 統計資訊中,欄位的物理儲存順序與邏輯順序相關性.

select correlation
  from pg_stats
 where schemaname = 'aki'
   and tablename = 'brin1'
   and attname = 'id';
+-------------+
| correlation |
+-------------+
|           1 |
+-------------+

select correlation
  from pg_stats
 where schemaname = 'aki'
   and tablename = 'brin2'
   and attname = 'id';
+--------------+
| correlation  |
+--------------+
| -0.009371842 |
+--------------+

看一下兩個table 的內部實際分佈情況,使用了ctid 系統欄位.

select one.id as "logical"
     , one.ctid as "physical in one"
     , two.ctid as "physical in two"
  from brin1 as one
  join brin2 as two
 using (id)
 where one.id between 1 and 10
 order by 1;
 
+---------+-----------------+-----------------+
| logical | physical in one | physical in two |
+---------+-----------------+-----------------+
|       1 | (0,1)           | (2836,26)       |
|       2 | (0,2)           | (3310,6)        |
|       3 | (0,3)           | (1423,168)      |
|       4 | (0,4)           | (4042,58)       |
|       5 | (0,5)           | (4117,27)       |
|       6 | (0,6)           | (2960,165)      |
|       7 | (0,7)           | (4052,80)       |
|       8 | (0,8)           | (2377,40)       |
|       9 | (0,9)           | (1249,151)      |
|      10 | (0,10)          | (2153,220)      |
+---------+-----------------+-----------------+
(10 rows)

由上面的觀察,可以得知 brin1 與 brin2 的資料相關情況.

接下來建立 BRIN index

create index brin1_idx on brin1 using brin (id);

create index brin2_idx on brin2 using brin (id);

with t as (
select pg_relation_size('brin1') as tableSize
     , pg_relation_size('brin1_idx') as indexSize
)
select pg_size_pretty(t.tableSize) as tableSize
     , pg_size_pretty(t.indexSize) as indexSize
     , (t.indexSize / t.tableSize::real) as "Index / Table"
  from t;

+-----------+-----------+-----------------------+
| tablesize | indexsize |     Index / Table     |
+-----------+-----------+-----------------------+
| 35 MB     | 24 kB     | 0.0006779661016949153 |
+-----------+-----------+-----------------------+

analyze brin1;
analyze brin2;

explain analyze 
 select sum(val)
   from brin1 
  where id >= 10000 and id < 20000; 
+--------------------------------------------------------------------------------------------------------------------------------+
|                                                           QUERY PLAN                                                           |
+--------------------------------------------------------------------------------------------------------------------------------+
| Aggregate  (cost=4892.16..4892.17 rows=1 width=8) (actual time=8.992..8.993 rows=1 loops=1)                                    |
|   ->  Bitmap Heap Scan on brin1  (cost=14.45..4868.01 rows=9660 width=4) (actual time=2.180..6.886 rows=10000 loops=1)         |
|         Recheck Cond: ((id >= 10000) AND (id < 20000))                                                                         |
|         Rows Removed by Index Recheck: 18928                                                                                   |
|         Heap Blocks: lossy=128                                                                                                 |
|         ->  Bitmap Index Scan on brin1_idx  (cost=0.00..12.03 rows=28571 width=0) (actual time=0.067..0.067 rows=1280 loops=1) |
|               Index Cond: ((id >= 10000) AND (id < 20000))                                                                     |
| Planning Time: 0.152 ms                                                                                                        |
| Execution Time: 9.071 ms                                                                                                       |
+--------------------------------------------------------------------------------------------------------------------------------+


相對 brin2 的情況

explain analyze 
 select sum(val)
   from brin2
  where id >= 10000 and id < 20000; 

+-------------------------------------------------------------------------------------------------------------------------------------+
|                                                             QUERY PLAN                                                              |
+-------------------------------------------------------------------------------------------------------------------------------------+
| Finalize Aggregate  (cost=11686.19..11686.20 rows=1 width=8) (actual time=81.731..82.950 rows=1 loops=1)                            |
|   ->  Gather  (cost=11685.97..11686.18 rows=2 width=8) (actual time=79.185..82.941 rows=3 loops=1)                                  |
|         Workers Planned: 2                                                                                                          |
|         Workers Launched: 2                                                                                                         |
|         ->  Partial Aggregate  (cost=10685.97..10685.98 rows=1 width=8) (actual time=53.211..53.212 rows=1 loops=3)                 |
|               ->  Parallel Seq Scan on brin2  (cost=0.00..10675.00 rows=4389 width=4) (actual time=0.261..52.585 rows=3333 loops=3) |
|                     Filter: ((id >= 10000) AND (id < 20000))                                                                        |
|                     Rows Removed by Filter: 330000                                                                                  |
| Planning Time: 0.370 ms                                                                                                             |
| Execution Time: 85.732 ms                                                                                                           |
+-------------------------------------------------------------------------------------------------------------------------------------+

觀察以上兩個查詢, brin2 是選擇做 Seq Scan, 不使用 brin index!

PostgreSQL 有提供一個 cluster 指令,可以依照指定的 index 來做資料排序.
但目前 brin index 還未提供 access method. 
我們先把 brin 形態 index 刪除,並且建立 btree desc index.

drop index brin2_idx;

create index brin2_btree_idx on brin2 using btree (id desc);

cluster brin2 using brin2_btree_idx;

CLUSTER
Time: 4536.090 ms (00:04.536)

commit;

analyze brin2;

花了4點多秒 重整百萬筆的table.接著來觀察相關性.


select correlation
  from pg_stats
 where schemaname = 'aki'
   and tablename = 'brin2'
   and attname = 'id';
+-------------+
| correlation |
+-------------+
|          -1 |
+-------------+

相關性的值域由 -1 ~ +1, 負就是反向, 越接近 0 相關性越差, 絕對值越接近 1 相關性越好.

接著觀察物理與邏輯順序

select one.id as "logical"
     , one.ctid as "physical in one"
     , two.ctid as "physical in two"
  from brin1 as one
  join brin2 as two
 using (id)
 where one.id between 1 and 5
 order by 1;
+---------+-----------------+-----------------+
| logical | physical in one | physical in two |
+---------+-----------------+-----------------+
|       1 | (0,1)           | (4424,176)      |
|       2 | (0,2)           | (4424,175)      |
|       3 | (0,3)           | (4424,174)      |
|       4 | (0,4)           | (4424,173)      |
|       5 | (0,5)           | (4424,172)      |
+---------+-----------------+-----------------+

select one.id as "logical"
     , one.ctid as "physical in one"
     , two.ctid as "physical in two"
  from brin1 as one
  join brin2 as two
 using (id)
 where one.id between 999996 and 1000000
 order by 1 desc;

+---------+-----------------+-----------------+
| logical | physical in one | physical in two |
+---------+-----------------+-----------------+
| 1000000 | (4424,176)      | (0,1)           |
|  999999 | (4424,175)      | (0,2)           |
|  999998 | (4424,174)      | (0,3)           |
|  999997 | (4424,173)      | (0,4)           |
|  999996 | (4424,172)      | (0,5)           |
+---------+-----------------+-----------------+

接著建立 brin2 的 brin index 

create index brin2_brin_idx on brin2 using brin (id);

commit;

analyze brin2;

explain analyze 
 select sum(val)
   from brin2
  where id >= 10000 and id < 20000; 

 這時候會選 btree 形態的 index!

+----------------------------------------------------------------------------------------------------------------------------------------+
|                                                               QUERY PLAN                                                               |
+----------------------------------------------------------------------------------------------------------------------------------------+
| Aggregate  (cost=395.44..395.45 rows=1 width=8) (actual time=17.802..17.803 rows=1 loops=1)                                            |
|   ->  Index Scan using brin2_btree_idx on brin2  (cost=0.42..369.89 rows=10223 width=4) (actual time=2.160..15.673 rows=10000 loops=1) |
|         Index Cond: ((id >= 10000) AND (id < 20000))                                                                                   |
| Planning Time: 0.518 ms                                                                                                                |
| Execution Time: 17.882 ms                                                                                                              |
+----------------------------------------------------------------------------------------------------------------------------------------+
第二次 Planning Time: 0.214 ms   Execution Time: 7.785 ms     

將 btree index 刪除.

drop index brin2_btree_idx;
commit;
analyze brin2;

explain analyze 
 select sum(val)
   from brin2
  where id >= 10000 and id < 20000; 

| Planning Time: 0.221 ms                                                                                                             |
| Execution Time: 11.685 ms 

| Planning Time: 0.116 ms                                                                                                             |
| Execution Time: 11.652 ms        

我們可以觀察到 brin index 對 相關性為負值的欄位,效果也不佳!
了解其限制及其適合的情境,我們就可以適當的使用.
今天先介紹到此!