iT邦幫忙

3

趣味SQL 260224 時間區間重疊偵測與合併

  • 分享至 

  • xImage
時間區間重疊偵測與合併
舉例來說,可以用在如以下幾種情境:
訂會議室(飯店房間)時避免衝突
HR 計算員工的不重複請假天數
保險保單的有效期間合併
計算伺服器實際運行時間(排除重疊的維護視窗)

當然不限於這些.
以下以會議室來當範例.

create table bookings (
  id int generated always as identity primary key
, room text not null
, booked_by text not null
, start_time timestamp not null
, end_time timestamp not null
, check (end_time > start_time)
);

insert into bookings (room, booked_by, start_time, end_time) values
-- Room A: 多段重疊,應合併
('A', 'Alice', '2025-01-10 09:00', '2025-01-10 11:00'),
('A', 'Bob',   '2025-01-10 10:30', '2025-01-10 12:00'),  -- 與 Alice 重疊
('A', 'Carol', '2025-01-10 11:45', '2025-01-10 13:30'),  -- 與 Bob 重疊
('A', 'Dave',  '2025-01-10 15:00', '2025-01-10 16:00'),  -- 獨立區間
('A', 'Eve',   '2025-01-10 15:30', '2025-01-10 17:00'),  -- 與 Dave 重疊
-- Room B: 完美銜接(相鄰但不重疊)
('B', 'Frank', '2025-01-10 09:00', '2025-01-10 10:00'),
('B', 'Grace', '2025-01-10 10:00', '2025-01-10 11:00'),  -- 緊接 Frank
('B', 'Heidi', '2025-01-10 10:00', '2025-01-10 11:30'),  -- 與 Grace 重疊
-- Room C: 完全不重疊
('C', 'Ivan',  '2025-01-10 09:00', '2025-01-10 10:00'),
('C', 'Judy',  '2025-01-10 14:00', '2025-01-10 15:00');

目標:
1.衝突偵測:找出所有有時間衝突的訂位組合。
2.區間合併:將每個房間的重疊/相鄰區間合併,算出實際被佔用的時間段。

-- 找出所有時間重疊的配對, 可以利用 greatest(), least() 函數 取代 case

select a.room
     , a.id as id1
     , a.booked_by as prsn1
     , b.id as id2
     , b.booked_by as prsn2
     , greatest(a.start_time, b.start_time) as overlap_start
     , least(a.end_time, b.end_time) as overlap_end
     , array[a.start_time, b.start_time] as starts
     , array[a.end_time, b.end_time] as ends
  from bookings a
  join bookings b
    on a.room = b.room
   and a.id < b.id   -- 避免重複配對與自我配對
   and a.start_time < b.end_time -- 重疊條件
   and b.start_time < a.end_time
 order by a.room, overlap_start;

 room | id1 | prsn1 | id2 | prsn2 |    overlap_start    |     overlap_end     |                    starts                     |                     ends
------+-----+-------+-----+-------+---------------------+---------------------+-----------------------------------------------+-----------------------------------------------
 A    |   1 | Alice |   2 | Bob   | 2025-01-10 10:30:00 | 2025-01-10 11:00:00 | {"2025-01-10 09:00:00","2025-01-10 10:30:00"} | {"2025-01-10 11:00:00","2025-01-10 12:00:00"}
 A    |   2 | Bob   |   3 | Carol | 2025-01-10 11:45:00 | 2025-01-10 12:00:00 | {"2025-01-10 10:30:00","2025-01-10 11:45:00"} | {"2025-01-10 12:00:00","2025-01-10 13:30:00"}
 A    |   4 | Dave  |   5 | Eve   | 2025-01-10 15:30:00 | 2025-01-10 16:00:00 | {"2025-01-10 15:00:00","2025-01-10 15:30:00"} | {"2025-01-10 16:00:00","2025-01-10 17:00:00"}
 B    |   7 | Grace |   8 | Heidi | 2025-01-10 10:00:00 | 2025-01-10 11:00:00 | {"2025-01-10 10:00:00","2025-01-10 10:00:00"} | {"2025-01-10 11:00:00","2025-01-10 11:30:00"}
(4 筆資料)

-- 區間合併

-- 利用window function 指定範圍功能, 找出每筆資料之前的最大值,也是最新的.
-- 時間型態的比較,大小其實是比較新舊.

select room
     , start_time
     , end_time
     , max(end_time) over(partition by room
       order by start_time
       rows between unbounded preceding and 1 preceding)
       as prev_max_end
  from bookings
 order by room, start_time;
 
 room |     start_time      |      end_time       |         max
------+---------------------+---------------------+---------------------
 A    | 2025-01-10 09:00:00 | 2025-01-10 11:00:00 | [null]
 A    | 2025-01-10 10:30:00 | 2025-01-10 12:00:00 | 2025-01-10 11:00:00
 A    | 2025-01-10 11:45:00 | 2025-01-10 13:30:00 | 2025-01-10 12:00:00
 A    | 2025-01-10 15:00:00 | 2025-01-10 16:00:00 | 2025-01-10 13:30:00
 A    | 2025-01-10 15:30:00 | 2025-01-10 17:00:00 | 2025-01-10 16:00:00
 B    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | [null]
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:00:00 | 2025-01-10 10:00:00
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:30:00 | 2025-01-10 11:00:00
 C    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | [null]
 C    | 2025-01-10 14:00:00 | 2025-01-10 15:00:00 | 2025-01-10 10:00:00
(10 筆資料)

-- 接著再與 start_time 比較, 或是 null, 找出分組開頭

with ordered as (
select room
     , start_time
     , end_time
     , max(end_time) over(partition by room
       order by start_time
       rows between unbounded preceding and 1 preceding)
       as prev_max_end
  from bookings
)
select room
     , start_time
     , end_time
     , prev_max_end
     , case
         when prev_max_end is null or start_time >= prev_max_end
         then 1
         else 0
       end as grp_start
  from ordered
 order by room, start_time;
 
 room |     start_time      |      end_time       |    prev_max_end     | grp_start
------+---------------------+---------------------+---------------------+-----------
 A    | 2025-01-10 09:00:00 | 2025-01-10 11:00:00 | [null]              |         1
 A    | 2025-01-10 10:30:00 | 2025-01-10 12:00:00 | 2025-01-10 11:00:00 |         0
 A    | 2025-01-10 11:45:00 | 2025-01-10 13:30:00 | 2025-01-10 12:00:00 |         0
 A    | 2025-01-10 15:00:00 | 2025-01-10 16:00:00 | 2025-01-10 13:30:00 |         1
 A    | 2025-01-10 15:30:00 | 2025-01-10 17:00:00 | 2025-01-10 16:00:00 |         0
 B    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | [null]              |         1
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:00:00 | 2025-01-10 10:00:00 |         1
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:30:00 | 2025-01-10 11:00:00 |         0
 C    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | [null]              |         1
 C    | 2025-01-10 14:00:00 | 2025-01-10 15:00:00 | 2025-01-10 10:00:00 |         1
(10 筆資料)

with ordered as (
select room
     , start_time
     , end_time
     , max(end_time) over(partition by room
       order by start_time
       rows between unbounded preceding and 1 preceding)
       as prev_max_end
  from bookings
), grp_start as (
select room
     , start_time
     , end_time
     --, prev_max_end
     , case
         when prev_max_end is null or start_time >= prev_max_end
         then 1
         else 0
       end as grp_start
  from ordered
)
select room
     , start_time
     , end_time
     , sum(grp_start) over(partition by room
        order by start_time) as grp_id
  from grp_start
 order by room, start_time;
 
 room |     start_time      |      end_time       | grp_id
------+---------------------+---------------------+--------
 A    | 2025-01-10 09:00:00 | 2025-01-10 11:00:00 |      1
 A    | 2025-01-10 10:30:00 | 2025-01-10 12:00:00 |      1
 A    | 2025-01-10 11:45:00 | 2025-01-10 13:30:00 |      1
 A    | 2025-01-10 15:00:00 | 2025-01-10 16:00:00 |      2
 A    | 2025-01-10 15:30:00 | 2025-01-10 17:00:00 |      2
 B    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 |      1
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:00:00 |      2
 B    | 2025-01-10 10:00:00 | 2025-01-10 11:30:00 |      2
 C    | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 |      1
 C    | 2025-01-10 14:00:00 | 2025-01-10 15:00:00 |      2
(10 筆資料)

-- 這是先以room切割後分組編號(組別)
-- 分組之後 就可以利用 room, grp_id 來聚合,使用 min() max() 合併區間了

with ordered as (
select room
     , start_time
     , end_time
     , max(end_time) over(partition by room
       order by start_time
       rows between unbounded preceding and 1 preceding)
       as prev_max_end
  from bookings
), grp_start as (
select room
     , start_time
     , end_time
     --, prev_max_end
     , case
         when prev_max_end is null or start_time >= prev_max_end
         then 1
         else 0
       end as grp_start
  from ordered
), grps as (
select room
     , start_time
     , end_time
     , sum(grp_start) over(partition by room
        order by start_time) as grp_id
  from grp_start
)
select room
     , grp_id
     , min(start_time) as grp_start
     , max(end_time) as grp_end
     , max(end_time) - min(start_time) as duration
  from grps
 group by room, grp_id
 order by room, grp_id;
 
 room | grp_id |      grp_start      |       grp_end       | duration
------+--------+---------------------+---------------------+----------
 A    |      1 | 2025-01-10 09:00:00 | 2025-01-10 13:30:00 | 04:30:00
 A    |      2 | 2025-01-10 15:00:00 | 2025-01-10 17:00:00 | 02:00:00
 B    |      1 | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | 01:00:00
 B    |      2 | 2025-01-10 10:00:00 | 2025-01-10 11:30:00 | 01:30:00
 C    |      1 | 2025-01-10 09:00:00 | 2025-01-10 10:00:00 | 01:00:00
 C    |      2 | 2025-01-10 14:00:00 | 2025-01-10 15:00:00 | 01:00:00
(6 筆資料)

-- 連續幾天都是使用類似手法,這類問題,在實際應用上很多案例,但是不一定能夠很好的解出來.所以提供一些案例給大家參考.
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中
不明
【**此則訊息已被站方移除**】

尚未有邦友回答

立即登入回答