使用者每天可能登入多次,同一天算一次。找出每個使用者的最長連續登入天數。
create schema pgchallenge;
create table pgchallenge.d260217 (
id int generated always as identity primary key
, user_id int not null
, login_ts timestamptz not null
, unique (user_id, login_ts)
);
insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
from generate_series(1, 14) as n
)
select 1, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
from t1
, t2;
-- 同一天重複登入
insert into pgchallenge.d260217 (user_id, login_ts) values
(1, '2026-01-06 01:23:45+08'), (1, '2026-01-06 05:34:47+08');
insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
from generate_series(0, 3) as n
)
select 2, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
from t1
, t2;
insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
from generate_series(5, 7) as n
)
select 2, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
from t1
, t2;
insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
from generate_series(0, 8, 2) as n
)
select 3, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
from t1
, t2;
delete
from pgchallenge.d260217
where id in (6, 8);
select *
from pgchallenge.d260217
order by user_id, login_ts;
id | user_id | login_ts
----+---------+------------------------
1 | 1 | 2026-01-02 18:04:47+08
2 | 1 | 2026-01-03 20:58:14+08
3 | 1 | 2026-01-04 00:39:07+08
4 | 1 | 2026-01-05 21:55:23+08
15 | 1 | 2026-01-06 01:23:45+08
16 | 1 | 2026-01-06 05:34:47+08
5 | 1 | 2026-01-06 19:34:56+08
7 | 1 | 2026-01-08 01:00:52+08
9 | 1 | 2026-01-10 04:56:13+08
10 | 1 | 2026-01-11 09:07:13+08
11 | 1 | 2026-01-12 20:19:16+08
12 | 1 | 2026-01-13 14:59:29+08
13 | 1 | 2026-01-14 11:34:38+08
14 | 1 | 2026-01-15 05:42:25+08
17 | 2 | 2026-01-01 14:31:44+08
18 | 2 | 2026-01-02 15:09:59+08
19 | 2 | 2026-01-03 14:37:25+08
20 | 2 | 2026-01-04 08:23:27+08
21 | 2 | 2026-01-06 00:25:18+08
22 | 2 | 2026-01-07 14:50:43+08
23 | 2 | 2026-01-08 07:46:04+08
24 | 3 | 2026-01-01 03:16:58+08
25 | 3 | 2026-01-03 05:24:34+08
26 | 3 | 2026-01-05 05:57:07+08
27 | 3 | 2026-01-07 18:34:04+08
28 | 3 | 2026-01-09 10:23:08+08
(26 筆資料)
-----
-- 先將 timestamptz 轉為 date, 並用 distinct 取得單一天
with distinct_date as (
select distinct user_id, date(login_ts) as dt
from pgchallenge.d260217
)
select user_id, dt
from distinct_date
order by 1, 2;
user_id | dt
---------+------------
1 | 2026-01-02
1 | 2026-01-03
1 | 2026-01-04
1 | 2026-01-05
1 | 2026-01-06
1 | 2026-01-08
1 | 2026-01-10
1 | 2026-01-11
1 | 2026-01-12
1 | 2026-01-13
1 | 2026-01-14
1 | 2026-01-15
2 | 2026-01-01
2 | 2026-01-02
2 | 2026-01-03
2 | 2026-01-04
2 | 2026-01-06
2 | 2026-01-07
2 | 2026-01-08
3 | 2026-01-01
3 | 2026-01-03
3 | 2026-01-05
3 | 2026-01-07
3 | 2026-01-09
(24 筆資料)
-- 依每個 user_id 分割, 照日期排序產生序號
with distinct_date as (
select distinct user_id, date(login_ts) as dt
from pgchallenge.d260217
)
select user_id, dt
, row_number() over(partition by user_id order by dt) as sn
from distinct_date;
user_id | dt | sn
---------+------------+----
1 | 2026-01-02 | 1
1 | 2026-01-03 | 2
1 | 2026-01-04 | 3
1 | 2026-01-05 | 4
1 | 2026-01-06 | 5
1 | 2026-01-08 | 6
1 | 2026-01-10 | 7
1 | 2026-01-11 | 8
1 | 2026-01-12 | 9
1 | 2026-01-13 | 10
1 | 2026-01-14 | 11
1 | 2026-01-15 | 12
2 | 2026-01-01 | 1
2 | 2026-01-02 | 2
2 | 2026-01-03 | 3
2 | 2026-01-04 | 4
2 | 2026-01-06 | 5
2 | 2026-01-07 | 6
2 | 2026-01-08 | 7
3 | 2026-01-01 | 1
3 | 2026-01-03 | 2
3 | 2026-01-05 | 3
3 | 2026-01-07 | 4
3 | 2026-01-09 | 5
(24 筆資料)
-- 連續日期減去連續序號會得到相同的值
with distinct_date as (
select distinct user_id, date(login_ts) as dt
from pgchallenge.d260217
), series as (
select user_id, dt
, (row_number() over(partition by user_id order by dt))::int as sn
from distinct_date
)
select *
, dt - sn as dtgrp
from series;
user_id | dt | sn | dtgrp
---------+------------+----+------------
1 | 2026-01-02 | 1 | 2026-01-01
1 | 2026-01-03 | 2 | 2026-01-01
1 | 2026-01-04 | 3 | 2026-01-01
1 | 2026-01-05 | 4 | 2026-01-01
1 | 2026-01-06 | 5 | 2026-01-01
1 | 2026-01-08 | 6 | 2026-01-02
1 | 2026-01-10 | 7 | 2026-01-03
1 | 2026-01-11 | 8 | 2026-01-03
1 | 2026-01-12 | 9 | 2026-01-03
1 | 2026-01-13 | 10 | 2026-01-03
1 | 2026-01-14 | 11 | 2026-01-03
1 | 2026-01-15 | 12 | 2026-01-03
2 | 2026-01-01 | 1 | 2025-12-31
2 | 2026-01-02 | 2 | 2025-12-31
2 | 2026-01-03 | 3 | 2025-12-31
2 | 2026-01-04 | 4 | 2025-12-31
2 | 2026-01-06 | 5 | 2026-01-01
2 | 2026-01-07 | 6 | 2026-01-01
2 | 2026-01-08 | 7 | 2026-01-01
3 | 2026-01-01 | 1 | 2025-12-31
3 | 2026-01-03 | 2 | 2026-01-01
3 | 2026-01-05 | 3 | 2026-01-02
3 | 2026-01-07 | 4 | 2026-01-03
3 | 2026-01-09 | 5 | 2026-01-04
(24 筆資料)
with distinct_date as (
select distinct user_id, date(login_ts) as dt
from pgchallenge.d260217
), series as (
select user_id, dt
, (row_number() over(partition by user_id order by dt))::int as sn
from distinct_date
), date_grp as (
select *
, dt - sn as dtgrp
from series
)
select user_id, dtgrp, count(*) as cnt
from date_grp
group by user_id, dtgrp
order by user_id, dtgrp;
user_id | dtgrp | cnt
---------+------------+-----
1 | 2026-01-01 | 5
1 | 2026-01-02 | 1
1 | 2026-01-03 | 6
2 | 2025-12-31 | 4
2 | 2026-01-01 | 3
3 | 2025-12-31 | 1
3 | 2026-01-01 | 1
3 | 2026-01-02 | 1
3 | 2026-01-03 | 1
3 | 2026-01-04 | 1
(10 筆資料)
with distinct_date as (
select distinct user_id, date(login_ts) as dt
from pgchallenge.d260217
), series as (
select user_id, dt
, (row_number() over(partition by user_id order by dt))::int as sn
from distinct_date
), date_grp as (
select *
, dt - sn as dtgrp
from series
), grp_cnt as (
select user_id, dtgrp, count(*) as cnt
from date_grp
group by user_id, dtgrp
)
select user_id, max(cnt)
from grp_cnt
group by user_id
order by user_id;
user_id | max
---------+-----
1 | 6
2 | 4
3 | 1
(3 筆資料)
我這邊有另一個思路,不過這個方式效能比較不好,而且盡量不要用來計算太長的時間,
不過這個有個好處是在前端做顯示時可以做標示圖
-- 1. 建立資料表
CREATE TABLE d260217 (
id int GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
user_id int NOT NULL,
login_ts timestamptz NOT NULL,
UNIQUE (user_id, login_ts)
);
-- 2. 寫入測試資料
INSERT INTO d260217 (user_id, login_ts)
SELECT 1, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
FROM generate_series(1, 14) n;
INSERT INTO d260217 (user_id, login_ts) VALUES
(1, '2026-01-06 01:23:45+08'), (1, '2026-01-06 05:34:47+08');
INSERT INTO d260217 (user_id, login_ts)
SELECT 2, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
FROM (SELECT n FROM generate_series(0, 3) n UNION SELECT n FROM generate_series(5, 7) n) t;
INSERT INTO d260217 (user_id, login_ts)
SELECT 3, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
FROM generate_series(0, 8, 2) n;
DELETE FROM d260217 WHERE id IN (6, 8);
WITH date_range AS (
SELECT
min(login_ts::date) as start_dt,
max(login_ts::date) as end_dt
FROM d260217
),
full_series AS (
SELECT generate_series(start_dt, end_dt, '1 day')::date AS dt
FROM date_range
),
user_daily_status AS (
SELECT
u.user_id,
fs.dt,
CASE WHEN d.user_id IS NOT NULL THEN '1' ELSE '0' END AS status
FROM (SELECT DISTINCT user_id FROM d260217) u
CROSS JOIN full_series fs
LEFT JOIN (SELECT DISTINCT user_id, login_ts::date AS dt FROM d260217) d
ON u.user_id = d.user_id AND fs.dt = d.dt
),
user_login_string AS (
SELECT
user_id,
string_agg(status, '' ORDER BY dt) AS login_pattern
FROM user_daily_status
GROUP BY user_id
)
SELECT
user_id,
login_pattern AS "登入模式樣態",
max(length(consecutive_ones)) AS "最長連續登入天數"
FROM (
SELECT
user_id,
login_pattern,
unnest(string_to_array(login_pattern, '0')) AS consecutive_ones
FROM user_login_string
) t
GROUP BY user_id, login_pattern
ORDER BY user_id;
| user_id | 登入模式樣態 | 最長連續登入天數 |
|---|---|---|
| 1 | 011111010111111 | 6 |
| 2 | 111101110000000 | 4 |
| 3 | 101010101000000 | 1 |