iT邦幫忙

0

以Postgresql為主,再聊聊資料庫 PostgreSQL Bloom index 介紹

PostgreSQL從 9.6 開始支援.

首先安裝 bloom extension.

create extension bloom;

以下會以 
https://www.cybertec-postgresql.com/en/trying-out-postgres-bloom-indexes/

這篇為主來介紹,中間一些作者沒有明確寫出的操作,我已經補充了.

create table vehicle (
  id int generated always as identity
, brand_code int not null
, model_code int not null
, color_code int not null
, year int not null
, body_type_code int not null
, doors int not null
, seats int not null
, gearbox_code int not null
, fuel_type_code int not null
, aircon_type_code int not null
);

-- 輸入500萬筆測試資料
insert into vehicle (brand_code, model_code, color_code, year, body_type_code, doors, seats, gearbox_code, fuel_type_code, aircon_type_code)
select random()*200 --brand
     , random()*25  --model
     , random()*25  --color
     , random()*26 + 1990 --year
     , random()*10  --body_type
     , random()*4 + 1 --doors
     , random()*8 + 1 --seats
     , random()*3   --gearbox
     , random()*10  --fuel
     , random()*4   --aircon
  from generate_series(1, 5*1e6);  

commit;

analyze vehicle;

-- create btree indexs and bloom index

CREATE INDEX ON vehicle(brand_code, model_code);
 
CREATE INDEX ON vehicle(color_code);
 
CREATE INDEX ON vehicle(year);
 
CREATE INDEX ON vehicle(body_type_code);
 
CREATE INDEX ON vehicle(doors);
 
CREATE INDEX ON vehicle(seats);
 
CREATE INDEX ON vehicle(gearbox_code);
 
CREATE INDEX ON vehicle(fuel_type_code);
 
CREATE INDEX ON vehicle(aircon_type_code);
 
CREATE INDEX bloom_80_bits ON vehicle USING bloom (brand_code,model_code,color_code,year,body_type_code,doors,seats,gearbox_code,fuel_type_code,aircon_type_code);

查看 index size
\di+ vehicle*
                                       List of relations
+--------+-----------------------------------+-------+-------+---------+--------+-------------+
| Schema |               Name                | Type  | Owner |  Table  |  Size  | Description |
+--------+-----------------------------------+-------+-------+---------+--------+-------------+
| aki    | vehicle_aircon_type_code_idx      | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_body_type_code_idx        | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_brand_code_model_code_idx | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_color_code_idx            | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_doors_idx                 | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_fuel_type_code_idx        | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_gearbox_code_idx          | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_seats_idx                 | index | aki   | vehicle | 107 MB |             |
| aki    | vehicle_year_idx                  | index | aki   | vehicle | 107 MB |             |
+--------+-----------------------------------+-------+-------+---------+--------+-------------+
(9 rows)

\di+ bloom_80_bits
                            List of relations
+--------+---------------+-------+-------+---------+-------+-------------+
| Schema |     Name      | Type  | Owner |  Table  | Size  | Description |
+--------+---------------+-------+-------+---------+-------+-------------+
| aki    | bloom_80_bits | index | aki   | vehicle | 77 MB |             |
+--------+---------------+-------+-------+---------+-------+-------------+
(1 row)


explain analyze 
select * 
  from vehicle
 where color_code = 12
   and year > 2010
   and body_type_code = 5
   and doors = 4
   and gearbox_code = 2
   and fuel_type_code = 5
   and aircon_type_code = 2;

+----------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                        QUERY PLAN                                                                        |
+----------------------------------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on vehicle  (cost=21980.81..28509.72 rows=9 width=44) (actual time=503.417..817.789 rows=7 loops=1)                                     |
|   Recheck Cond: ((color_code = 12) AND (body_type_code = 5) AND (fuel_type_code = 5))                                                                    |
|   Filter: ((year > 2010) AND (doors = 4) AND (gearbox_code = 2) AND (aircon_type_code = 2))                                                              |
|   Rows Removed by Filter: 1941                                                                                                                           |
|   Heap Blocks: exact=1911                                                                                                                                |
|   ->  BitmapAnd  (cost=21980.81..21980.81 rows=1947 width=0) (actual time=469.211..469.212 rows=0 loops=1)                                               |
|         ->  Bitmap Index Scan on vehicle_color_code_idx  (cost=0.00..3674.43 rows=198667 width=0) (actual time=81.800..81.801 rows=199258 loops=1)       |
|               Index Cond: (color_code = 12)                                                                                                              |
|         ->  Bitmap Index Scan on vehicle_body_type_code_idx  (cost=0.00..9126.94 rows=493667 width=0) (actual time=178.718..178.718 rows=500185 loops=1) |
|               Index Cond: (body_type_code = 5)                                                                                                           |
|         ->  Bitmap Index Scan on vehicle_fuel_type_code_idx  (cost=0.00..9178.93 rows=496333 width=0) (actual time=199.431..199.431 rows=499639 loops=1) |
|               Index Cond: (fuel_type_code = 5)                                                                                                           |
| Planning Time: 64.370 ms                                                                                                                                 |
| Execution Time: 818.359 ms                                                                                                                               |
+----------------------------------------------------------------------------------------------------------------------------------------------------------+
(14 rows)

PostgreSQL 的優化器選擇了三個 index 做 Bitmap Index Scan 然後用 BitmapAnd 運算.
沒有選擇 bloom index.

接著我們將上面9個 btree index 暫時disable

注意! 這是危險動作,若不熟悉,或是 index 的名稱不是這樣的.
可以改用 drop index

update pg_index
   set indisvalid = false
 where indexrelid::regclass::text like 'vehicle_%';

UPDATE 9
Time: 24.649 ms

commit;

設定不做 Sequence Scna
SET enable_seqscan = OFF;

explain analyze 
select * 
  from vehicle
 where color_code = 12
   and year > 2010
   and body_type_code = 5
   and doors = 4
   and gearbox_code = 2
   and fuel_type_code = 5
   and aircon_type_code = 2;

+-------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                         QUERY PLAN                                                                          |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Bitmap Heap Scan on vehicle  (cost=139220.00..139381.49 rows=9 width=44) (actual time=185.860..191.202 rows=7 loops=1)                                      |
|   Recheck Cond: ((color_code = 12) AND (body_type_code = 5) AND (doors = 4) AND (gearbox_code = 2) AND (fuel_type_code = 5) AND (aircon_type_code = 2))     |
|   Rows Removed by Index Recheck: 24                                                                                                                         |
|   Filter: (year > 2010)                                                                                                                                     |
|   Rows Removed by Filter: 33                                                                                                                                |
|   Heap Blocks: exact=64                                                                                                                                     |
|   ->  Bitmap Index Scan on bloom_80_bits  (cost=0.00..139220.00 rows=41 width=0) (actual time=182.069..182.069 rows=64 loops=1)                             |
|         Index Cond: ((color_code = 12) AND (body_type_code = 5) AND (doors = 4) AND (gearbox_code = 2) AND (fuel_type_code = 5) AND (aircon_type_code = 2)) |
| Planning Time: 0.787 ms                                                                                                                                     |
| Execution Time: 191.528 ms                                                                                                                                  |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------+

可以觀察到這次優化器選擇了 Bitmap Index Scan on bloom_80_bits

Execution Time: 191.528 ms 
比上面使用了三個btree index 的 Execution Time: 818.359 ms 
快了不少.

尚未有邦友留言

立即登入留言