iT邦幫忙

2

趣味SQL 260217 最長連續登入天數

  • 分享至 

  • xImage
使用者每天可能登入多次,同一天算一次。找出每個使用者的最長連續登入天數。

create schema pgchallenge;

create table pgchallenge.d260217 (
  id int generated always as identity primary key
, user_id int not null
, login_ts timestamptz not null
, unique (user_id, login_ts)
);

insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
  from generate_series(1, 14) as n
)
select 1, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
  from t1
     , t2;

-- 同一天重複登入
insert into pgchallenge.d260217 (user_id, login_ts) values
(1, '2026-01-06 01:23:45+08'), (1, '2026-01-06 05:34:47+08');


insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
  from generate_series(0, 3) as n
)
select 2, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
  from t1
     , t2;
     

insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
  from generate_series(5, 7) as n
)
select 2, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
  from t1
     , t2;
     
insert into pgchallenge.d260217 (user_id, login_ts)
with t1 as (
select date_trunc('year', now()) as dt
), t2 as (
select n
  from generate_series(0, 8, 2) as n
)
select 3, dt + interval '1 day' * n + interval '1 second' * random(1, 86400)
  from t1
     , t2;
     
delete
  from pgchallenge.d260217
 where id in (6, 8);
 
select *
  from pgchallenge.d260217
 order by user_id, login_ts;

 id | user_id |        login_ts
----+---------+------------------------
  1 |       1 | 2026-01-02 18:04:47+08
  2 |       1 | 2026-01-03 20:58:14+08
  3 |       1 | 2026-01-04 00:39:07+08
  4 |       1 | 2026-01-05 21:55:23+08
 15 |       1 | 2026-01-06 01:23:45+08
 16 |       1 | 2026-01-06 05:34:47+08
  5 |       1 | 2026-01-06 19:34:56+08
  7 |       1 | 2026-01-08 01:00:52+08
  9 |       1 | 2026-01-10 04:56:13+08
 10 |       1 | 2026-01-11 09:07:13+08
 11 |       1 | 2026-01-12 20:19:16+08
 12 |       1 | 2026-01-13 14:59:29+08
 13 |       1 | 2026-01-14 11:34:38+08
 14 |       1 | 2026-01-15 05:42:25+08
 17 |       2 | 2026-01-01 14:31:44+08
 18 |       2 | 2026-01-02 15:09:59+08
 19 |       2 | 2026-01-03 14:37:25+08
 20 |       2 | 2026-01-04 08:23:27+08
 21 |       2 | 2026-01-06 00:25:18+08
 22 |       2 | 2026-01-07 14:50:43+08
 23 |       2 | 2026-01-08 07:46:04+08
 24 |       3 | 2026-01-01 03:16:58+08
 25 |       3 | 2026-01-03 05:24:34+08
 26 |       3 | 2026-01-05 05:57:07+08
 27 |       3 | 2026-01-07 18:34:04+08
 28 |       3 | 2026-01-09 10:23:08+08
(26 筆資料)

-----
-- 先將 timestamptz 轉為 date, 並用 distinct 取得單一天

with distinct_date as (
select distinct user_id, date(login_ts) as dt
  from pgchallenge.d260217
)
select user_id, dt
  from distinct_date
 order by 1, 2;
 
 user_id |     dt
---------+------------
       1 | 2026-01-02
       1 | 2026-01-03
       1 | 2026-01-04
       1 | 2026-01-05
       1 | 2026-01-06
       1 | 2026-01-08
       1 | 2026-01-10
       1 | 2026-01-11
       1 | 2026-01-12
       1 | 2026-01-13
       1 | 2026-01-14
       1 | 2026-01-15
       2 | 2026-01-01
       2 | 2026-01-02
       2 | 2026-01-03
       2 | 2026-01-04
       2 | 2026-01-06
       2 | 2026-01-07
       2 | 2026-01-08
       3 | 2026-01-01
       3 | 2026-01-03
       3 | 2026-01-05
       3 | 2026-01-07
       3 | 2026-01-09
(24 筆資料)

-- 依每個 user_id 分割, 照日期排序產生序號

with distinct_date as (
select distinct user_id, date(login_ts) as dt
  from pgchallenge.d260217
)
select user_id, dt
     , row_number() over(partition by user_id order by dt) as sn
  from distinct_date;

 user_id |     dt     | sn
---------+------------+----
       1 | 2026-01-02 |  1
       1 | 2026-01-03 |  2
       1 | 2026-01-04 |  3
       1 | 2026-01-05 |  4
       1 | 2026-01-06 |  5
       1 | 2026-01-08 |  6
       1 | 2026-01-10 |  7
       1 | 2026-01-11 |  8
       1 | 2026-01-12 |  9
       1 | 2026-01-13 | 10
       1 | 2026-01-14 | 11
       1 | 2026-01-15 | 12
       2 | 2026-01-01 |  1
       2 | 2026-01-02 |  2
       2 | 2026-01-03 |  3
       2 | 2026-01-04 |  4
       2 | 2026-01-06 |  5
       2 | 2026-01-07 |  6
       2 | 2026-01-08 |  7
       3 | 2026-01-01 |  1
       3 | 2026-01-03 |  2
       3 | 2026-01-05 |  3
       3 | 2026-01-07 |  4
       3 | 2026-01-09 |  5
(24 筆資料)

-- 連續日期減去連續序號會得到相同的值

with distinct_date as (
select distinct user_id, date(login_ts) as dt
  from pgchallenge.d260217
), series as (
select user_id, dt
     , (row_number() over(partition by user_id order by dt))::int as sn
  from distinct_date
)
select *
     , dt - sn as dtgrp
  from series;
  
 user_id |     dt     | sn |   dtgrp
---------+------------+----+------------
       1 | 2026-01-02 |  1 | 2026-01-01
       1 | 2026-01-03 |  2 | 2026-01-01
       1 | 2026-01-04 |  3 | 2026-01-01
       1 | 2026-01-05 |  4 | 2026-01-01
       1 | 2026-01-06 |  5 | 2026-01-01
       1 | 2026-01-08 |  6 | 2026-01-02
       1 | 2026-01-10 |  7 | 2026-01-03
       1 | 2026-01-11 |  8 | 2026-01-03
       1 | 2026-01-12 |  9 | 2026-01-03
       1 | 2026-01-13 | 10 | 2026-01-03
       1 | 2026-01-14 | 11 | 2026-01-03
       1 | 2026-01-15 | 12 | 2026-01-03
       2 | 2026-01-01 |  1 | 2025-12-31
       2 | 2026-01-02 |  2 | 2025-12-31
       2 | 2026-01-03 |  3 | 2025-12-31
       2 | 2026-01-04 |  4 | 2025-12-31
       2 | 2026-01-06 |  5 | 2026-01-01
       2 | 2026-01-07 |  6 | 2026-01-01
       2 | 2026-01-08 |  7 | 2026-01-01
       3 | 2026-01-01 |  1 | 2025-12-31
       3 | 2026-01-03 |  2 | 2026-01-01
       3 | 2026-01-05 |  3 | 2026-01-02
       3 | 2026-01-07 |  4 | 2026-01-03
       3 | 2026-01-09 |  5 | 2026-01-04
(24 筆資料)

with distinct_date as (
select distinct user_id, date(login_ts) as dt
  from pgchallenge.d260217
), series as (
select user_id, dt
     , (row_number() over(partition by user_id order by dt))::int as sn
  from distinct_date
), date_grp as (
select *
     , dt - sn as dtgrp
  from series
)
select user_id, dtgrp, count(*) as cnt
  from date_grp
 group by user_id, dtgrp
 order by user_id, dtgrp;
 
 user_id |   dtgrp    | cnt
---------+------------+-----
       1 | 2026-01-01 |   5
       1 | 2026-01-02 |   1
       1 | 2026-01-03 |   6
       2 | 2025-12-31 |   4
       2 | 2026-01-01 |   3
       3 | 2025-12-31 |   1
       3 | 2026-01-01 |   1
       3 | 2026-01-02 |   1
       3 | 2026-01-03 |   1
       3 | 2026-01-04 |   1
(10 筆資料)

with distinct_date as (
select distinct user_id, date(login_ts) as dt
  from pgchallenge.d260217
), series as (
select user_id, dt
     , (row_number() over(partition by user_id order by dt))::int as sn
  from distinct_date
), date_grp as (
select *
     , dt - sn as dtgrp
  from series
), grp_cnt as (
select user_id, dtgrp, count(*) as cnt
  from date_grp
 group by user_id, dtgrp
)
select user_id, max(cnt)
  from grp_cnt
 group by user_id
 order by user_id;

 user_id | max
---------+-----
       1 |   6
       2 |   4
       3 |   1
(3 筆資料)
rogeryao iT邦超人 7 級 ‧ 2026-02-18 17:01:09 檢舉
『連續日期減去連續序號會得到相同的值』
妙不可言,甚好,甚好
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

1 個回答

0
老鷹(eagle)
iT邦高手 1 級 ‧ 2026-02-24 14:40:32

登入日期標記轉文字處理

我這邊有另一個思路,不過這個方式效能比較不好,而且盡量不要用來計算太長的時間,
不過這個有個好處是在前端做顯示時可以做標示圖

    -- 1. 建立資料表 
    CREATE TABLE d260217 (
        id int GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
        user_id int NOT NULL,
        login_ts timestamptz NOT NULL,
        UNIQUE (user_id, login_ts)
    );
    
    -- 2. 寫入測試資料
    INSERT INTO d260217 (user_id, login_ts)
    SELECT 1, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
    FROM generate_series(1, 14) n;
    

    INSERT INTO d260217 (user_id, login_ts) VALUES
    (1, '2026-01-06 01:23:45+08'), (1, '2026-01-06 05:34:47+08');
    
    INSERT INTO d260217 (user_id, login_ts)
    SELECT 2, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
    FROM (SELECT n FROM generate_series(0, 3) n UNION SELECT n FROM generate_series(5, 7) n) t;
    
    INSERT INTO d260217 (user_id, login_ts)
    SELECT 3, DATE_TRUNC('year', now()) + (n || ' day')::interval + (random() * 86400 || ' second')::interval
    FROM generate_series(0, 8, 2) n;
    

    DELETE FROM d260217 WHERE id IN (6, 8);

    WITH date_range AS (
        SELECT 
            min(login_ts::date) as start_dt,
            max(login_ts::date) as end_dt
        FROM d260217
    ),
    full_series AS (
        SELECT generate_series(start_dt, end_dt, '1 day')::date AS dt
        FROM date_range
    ),
    user_daily_status AS (
        SELECT 
            u.user_id,
            fs.dt,
            CASE WHEN d.user_id IS NOT NULL THEN '1' ELSE '0' END AS status
        FROM (SELECT DISTINCT user_id FROM d260217) u
        CROSS JOIN full_series fs
        LEFT JOIN (SELECT DISTINCT user_id, login_ts::date AS dt FROM d260217) d
          ON u.user_id = d.user_id AND fs.dt = d.dt
    ),
    user_login_string AS (
        SELECT 
            user_id, 
            string_agg(status, '' ORDER BY dt) AS login_pattern
        FROM user_daily_status
        GROUP BY user_id
    )
    SELECT 
        user_id,
        login_pattern AS "登入模式樣態",
        max(length(consecutive_ones)) AS "最長連續登入天數"
    FROM (
        SELECT 
            user_id, 
            login_pattern,
            unnest(string_to_array(login_pattern, '0')) AS consecutive_ones
        FROM user_login_string
    ) t
    GROUP BY user_id, login_pattern
    ORDER BY user_id;
user_id 登入模式樣態 最長連續登入天數
1 011111010111111 6
2 111101110000000 4
3 101010101000000 1

使用array, 這樣也是很有意思的方式.

不明
【**此則訊息已被站方移除**】

我要發表回答

立即登入回答