create table it201120 (
id int generated always as identity
, gts timestamp not null
, val int not null
);
-- 輸入10萬筆
insert into it201120(gts, val)
select 'yesterday'::timestamp + interval '1 second' * ceil(random() * 86400)
, floor(random() * 1e6)
from generate_series(1, 1e5);
select min(gts)
, max(gts)
from it201120;
min | max
---------------------+---------------------
2020-11-19 00:00:01 | 2020-11-20 00:00:00
-- 若我們想將資料依據時間戳,分成100個區間,列出區間 起始時間戳, 結束時間戳
-- 以及區間內 max(val), min(val).
-- 像是這樣的
startts | endts | minval | maxval
---------------------+---------------------+--------+--------
2020-11-19 00:00:01 | 2020-11-19 00:13:35 | 493 | 999965
2020-11-19 00:13:36 | 2020-11-19 00:28:07 | 21 | 999332
2020-11-19 00:28:09 | 2020-11-19 00:43:15 | 1207 | 998228
...
2020-11-19 23:16:58 | 2020-11-19 23:31:22 | 382 | 999968
2020-11-19 23:31:23 | 2020-11-19 23:45:26 | 214 | 999058
2020-11-19 23:45:26 | 2020-11-20 00:00:00 | 994 | 998460
(100 rows)
ntile 是你的朋友. (Oracle 與 SQL 都有, 以下是用 Oracle 的 LiveSQL 做範例)
我向來土法練鋼,遵古法製,沒有奇門遁甲~
select
min(gts) AS startts,
max(gts) AS endts,
min(val) AS minval,
max(val) AS maxval
from
it201120,
(SELECT @stime:= (select min(gts) from it201120)) AS a,
(select @nSecond := (select (UNIX_TIMESTAMP((select max(gts) from it201120)) - UNIX_TIMESTAMP(@stime))/99)) AS b
group by round((UNIX_TIMESTAMP(gts) - UNIX_TIMESTAMP(@stime))/@nSecond,0);
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=20be485c98da8227418363292f59873a
CREATE TABLE IT201120 (
id int PRIMARY KEY AUTO_INCREMENT,
gts timestamp,
val int NOT NULL
)
SET @@cte_max_recursion_depth = 100000;
SET @Today = CURDATE() + interval 0 second;
-- 隨機產生10萬筆時間與數值資料
INSERT INTO IT201120 (gts, val)
WITH RECURSIVE GenerateRandomTime AS (
SELECT @Today AS DateTimeField, 1 AS RowNum
UNION ALL
SELECT @Today + interval ceil(RAND() * 86400) second AS DateTimeField, RowNum + 1
FROM GenerateRandomTime
WHERE RowNum < 1e5
)
SELECT DateTimeField, (RAND() * 1e6)
FROM GenerateRandomTime;
-- 最大時間與最小時間
SELECT MIN(gts),
MAX(gts)
FROM IT201120
-- 依照資料筆數分成100組
-- 取當組別的開始與結束時間(gts),以及該組別的最大與最小值(val)
SET @rowPerGroup = (SELECT COUNT(*) / 100 FROM IT201120);
SELECT groupIndex, MIN(gts), MAX(gts), MIN(val), MAX(val)
FROM (
SELECT ceil(ROW_NUMBER() OVER (ORDER BY gts) / @rowPerGroup ) AS groupIndex, gts, val
FROM IT201120
) AS GroupResult
GROUP BY groupIndex
我承認臨時抱佛腳,OPEN BOOK學POSTGRESQL
請砍小力一點
既然是86400秒內隨意分組,那我就切成100組,以864秒為一組來切
Schema (PostgreSQL v13 (Beta))
create table it201120 (
id int generated always as identity
, gts timestamp not null
, val int not null
);
-- 輸入10萬筆
insert into it201120(gts, val)
select 'yesterday'::timestamp + interval '1 second' * ceil(random() * 86400)
, floor(random() * 1e6)
from generate_series(1, 1e5);
select min(gts)
, max(gts)
from it201120;
Query #1
with five_min_intervals as (
select
(select min(gts)::date from it201120) + ( n || ' seconds')::interval start_time,
(select min(gts)::date from it201120) + ((n+864) || ' seconds')::interval end_time
from generate_series(0, (24*60*60), 864) n
)
select min(m.gts), max(m.gts),min(m.val),max(m.val)
from it201120 m
right join five_min_intervals f
on m.gts >= f.start_time and m.gts < f.end_time
group by f.start_time, f.end_time
order by f.start_time;
min | max | min | max |
---|---|---|---|
2020-11-19T00:00:01.000Z | 2020-11-19T00:14:22.000Z | 4227 | 999447 |
2020-11-19T00:14:24.000Z | 2020-11-19T00:28:46.000Z | 367 | 999599 |
2020-11-19T00:28:48.000Z | 2020-11-19T00:43:08.000Z | 286 | 999960 |
2020-11-19T00:43:13.000Z | 2020-11-19T00:57:35.000Z | 1371 | 999962 |
2020-11-19T00:57:36.000Z | 2020-11-19T01:11:59.000Z | 920 | 996910 |
2020-11-19T01:12:01.000Z | 2020-11-19T01:26:23.000Z | 3770 | 999223 |
2020-11-19T01:26:25.000Z | 2020-11-19T01:40:46.000Z | 1140 | 999688 |
2020-11-19T01:40:50.000Z | 2020-11-19T01:55:08.000Z | 29 | 999103 |
2020-11-19T01:55:14.000Z | 2020-11-19T02:09:34.000Z | 871 | 999093 |
2020-11-19T02:09:36.000Z | 2020-11-19T02:23:55.000Z | 406 | 997703 |
2020-11-19T02:24:00.000Z | 2020-11-19T02:38:22.000Z | 1520 | 998819 |
2020-11-19T02:38:24.000Z | 2020-11-19T02:52:46.000Z | 816 | 999275 |
2020-11-19T02:52:48.000Z | 2020-11-19T03:07:11.000Z | 522 | 999010 |
2020-11-19T03:07:13.000Z | 2020-11-19T03:21:35.000Z | 552 | 996901 |
2020-11-19T03:21:36.000Z | 2020-11-19T03:35:59.000Z | 275 | 999397 |
2020-11-19T03:36:00.000Z | 2020-11-19T03:50:23.000Z | 1093 | 999944 |
2020-11-19T03:50:24.000Z | 2020-11-19T04:04:46.000Z | 2085 | 999091 |
2020-11-19T04:04:48.000Z | 2020-11-19T04:19:10.000Z | 9 | 999879 |
2020-11-19T04:19:12.000Z | 2020-11-19T04:33:35.000Z | 337 | 999961 |
2020-11-19T04:33:36.000Z | 2020-11-19T04:47:59.000Z | 1820 | 998675 |
2020-11-19T04:48:00.000Z | 2020-11-19T05:02:22.000Z | 443 | 999918 |
2020-11-19T05:02:24.000Z | 2020-11-19T05:16:47.000Z | 760 | 999808 |
2020-11-19T05:16:48.000Z | 2020-11-19T05:31:11.000Z | 159 | 999411 |
2020-11-19T05:31:12.000Z | 2020-11-19T05:45:35.000Z | 3029 | 999529 |
2020-11-19T05:45:36.000Z | 2020-11-19T05:59:59.000Z | 7576 | 998873 |
2020-11-19T06:00:02.000Z | 2020-11-19T06:14:22.000Z | 1307 | 998807 |
2020-11-19T06:14:24.000Z | 2020-11-19T06:28:46.000Z | 212 | 998015 |
2020-11-19T06:28:52.000Z | 2020-11-19T06:43:09.000Z | 871 | 999927 |
2020-11-19T06:43:12.000Z | 2020-11-19T06:57:33.000Z | 3836 | 999919 |
2020-11-19T06:57:36.000Z | 2020-11-19T07:11:59.000Z | 1495 | 999750 |
2020-11-19T07:12:00.000Z | 2020-11-19T07:26:22.000Z | 866 | 998540 |
2020-11-19T07:26:24.000Z | 2020-11-19T07:40:47.000Z | 2397 | 999209 |
2020-11-19T07:40:49.000Z | 2020-11-19T07:55:10.000Z | 2688 | 999556 |
2020-11-19T07:55:12.000Z | 2020-11-19T08:09:34.000Z | 501 | 998850 |
2020-11-19T08:09:36.000Z | 2020-11-19T08:23:59.000Z | 1176 | 999860 |
2020-11-19T08:24:01.000Z | 2020-11-19T08:38:23.000Z | 1861 | 999839 |
2020-11-19T08:38:24.000Z | 2020-11-19T08:52:47.000Z | 90 | 999800 |
2020-11-19T08:52:49.000Z | 2020-11-19T09:07:10.000Z | 11 | 999204 |
2020-11-19T09:07:14.000Z | 2020-11-19T09:21:34.000Z | 899 | 997214 |
2020-11-19T09:21:37.000Z | 2020-11-19T09:35:57.000Z | 2328 | 999016 |
2020-11-19T09:36:00.000Z | 2020-11-19T09:50:23.000Z | 546 | 999728 |
2020-11-19T09:50:24.000Z | 2020-11-19T10:04:47.000Z | 294 | 999319 |
2020-11-19T10:04:50.000Z | 2020-11-19T10:19:11.000Z | 1482 | 998081 |
2020-11-19T10:19:12.000Z | 2020-11-19T10:33:35.000Z | 1482 | 998338 |
2020-11-19T10:33:38.000Z | 2020-11-19T10:47:59.000Z | 679 | 999835 |
2020-11-19T10:48:01.000Z | 2020-11-19T11:02:23.000Z | 10 | 999399 |
2020-11-19T11:02:26.000Z | 2020-11-19T11:16:47.000Z | 363 | 998072 |
2020-11-19T11:16:50.000Z | 2020-11-19T11:31:10.000Z | 1204 | 998189 |
2020-11-19T11:31:13.000Z | 2020-11-19T11:45:35.000Z | 2 | 999570 |
2020-11-19T11:45:37.000Z | 2020-11-19T11:59:59.000Z | 41 | 998196 |
2020-11-19T12:00:00.000Z | 2020-11-19T12:14:22.000Z | 436 | 999402 |
2020-11-19T12:14:24.000Z | 2020-11-19T12:28:47.000Z | 266 | 998941 |
2020-11-19T12:28:48.000Z | 2020-11-19T12:43:11.000Z | 724 | 997037 |
2020-11-19T12:43:12.000Z | 2020-11-19T12:57:35.000Z | 553 | 999945 |
2020-11-19T12:57:36.000Z | 2020-11-19T13:11:57.000Z | 336 | 999934 |
2020-11-19T13:12:01.000Z | 2020-11-19T13:26:23.000Z | 1271 | 997728 |
2020-11-19T13:26:24.000Z | 2020-11-19T13:40:46.000Z | 8 | 999216 |
2020-11-19T13:40:48.000Z | 2020-11-19T13:55:11.000Z | 685 | 998181 |
2020-11-19T13:55:13.000Z | 2020-11-19T14:09:32.000Z | 1578 | 999730 |
2020-11-19T14:09:37.000Z | 2020-11-19T14:23:59.000Z | 1025 | 999015 |
2020-11-19T14:24:01.000Z | 2020-11-19T14:38:23.000Z | 946 | 999745 |
2020-11-19T14:38:24.000Z | 2020-11-19T14:52:47.000Z | 1568 | 997023 |
2020-11-19T14:52:48.000Z | 2020-11-19T15:07:11.000Z | 403 | 999763 |
2020-11-19T15:07:12.000Z | 2020-11-19T15:21:34.000Z | 935 | 999344 |
2020-11-19T15:21:36.000Z | 2020-11-19T15:35:59.000Z | 396 | 998724 |
2020-11-19T15:36:01.000Z | 2020-11-19T15:50:23.000Z | 1207 | 999194 |
2020-11-19T15:50:24.000Z | 2020-11-19T16:04:46.000Z | 4244 | 999131 |
2020-11-19T16:04:48.000Z | 2020-11-19T16:19:11.000Z | 1794 | 999524 |
2020-11-19T16:19:12.000Z | 2020-11-19T16:33:35.000Z | 455 | 999777 |
2020-11-19T16:33:37.000Z | 2020-11-19T16:47:58.000Z | 3205 | 999940 |
2020-11-19T16:48:00.000Z | 2020-11-19T17:02:23.000Z | 916 | 998656 |
2020-11-19T17:02:24.000Z | 2020-11-19T17:16:46.000Z | 1675 | 998034 |
2020-11-19T17:16:48.000Z | 2020-11-19T17:31:09.000Z | 344 | 999525 |
2020-11-19T17:31:12.000Z | 2020-11-19T17:45:35.000Z | 624 | 999038 |
2020-11-19T17:45:37.000Z | 2020-11-19T17:59:58.000Z | 340 | 999890 |
2020-11-19T18:00:00.000Z | 2020-11-19T18:14:23.000Z | 1795 | 999805 |
2020-11-19T18:14:24.000Z | 2020-11-19T18:28:46.000Z | 2024 | 998740 |
2020-11-19T18:28:48.000Z | 2020-11-19T18:43:09.000Z | 3658 | 995671 |
2020-11-19T18:43:12.000Z | 2020-11-19T18:57:35.000Z | 2448 | 998598 |
2020-11-19T18:57:36.000Z | 2020-11-19T19:11:59.000Z | 1609 | 998399 |
2020-11-19T19:12:00.000Z | 2020-11-19T19:26:23.000Z | 317 | 999677 |
2020-11-19T19:26:24.000Z | 2020-11-19T19:40:47.000Z | 2841 | 999100 |
2020-11-19T19:40:49.000Z | 2020-11-19T19:55:10.000Z | 930 | 998129 |
2020-11-19T19:55:12.000Z | 2020-11-19T20:09:35.000Z | 2950 | 999982 |
2020-11-19T20:09:36.000Z | 2020-11-19T20:23:59.000Z | 997 | 998526 |
2020-11-19T20:24:00.000Z | 2020-11-19T20:38:23.000Z | 1567 | 999616 |
2020-11-19T20:38:24.000Z | 2020-11-19T20:52:47.000Z | 2205 | 999964 |
2020-11-19T20:52:48.000Z | 2020-11-19T21:07:11.000Z | 1083 | 998963 |
2020-11-19T21:07:12.000Z | 2020-11-19T21:21:35.000Z | 283 | 998418 |
2020-11-19T21:21:36.000Z | 2020-11-19T21:35:59.000Z | 1083 | 998810 |
2020-11-19T21:36:01.000Z | 2020-11-19T21:50:23.000Z | 613 | 999572 |
2020-11-19T21:50:24.000Z | 2020-11-19T22:04:46.000Z | 866 | 999859 |
2020-11-19T22:04:48.000Z | 2020-11-19T22:19:11.000Z | 1381 | 999984 |
2020-11-19T22:19:12.000Z | 2020-11-19T22:33:34.000Z | 613 | 998816 |
2020-11-19T22:33:36.000Z | 2020-11-19T22:47:59.000Z | 690 | 996188 |
2020-11-19T22:48:01.000Z | 2020-11-19T23:02:23.000Z | 396 | 997978 |
2020-11-19T23:02:24.000Z | 2020-11-19T23:16:47.000Z | 2227 | 998265 |
2020-11-19T23:16:48.000Z | 2020-11-19T23:31:09.000Z | 2305 | 999701 |
2020-11-19T23:31:12.000Z | 2020-11-19T23:45:35.000Z | 723 | 999379 |
2020-11-19T23:45:36.000Z | 2020-11-19T23:59:59.000Z | 1396 | 999731 |
眼睛為之一亮!我要改學postgresql!!
突然想到,要是在某個864秒區間內沒有任何一筆資料...........
我卡住了
突然想到,要是在某個864秒區間內沒有任何一筆資料...........
若為了避免【某區間沒有任何一筆資料】之問題,那就不能用秒數來做區間,要換成以筆數來分段。
with temp AS (
SELECT
(@id :=@id+1) AS id,
it201120.gts,
it201120.val
FROM
it201120,
(select @id :=0) AS a
)
SELECT
min(gts) AS startts,
max(gts) AS endts,
min(val) AS minval,
max(val) AS maxval
from
temp,(select @n:=(select max(id) from temp)/99) AS a
group by round(id/@n)
以列表數來分段切段也有點怪
三年級的學生700人,請依分數高低,分成10組並列出每組最高最低分,最高最低學號
好像也沒什麼參考價值,真的是純資料庫了
您的問題也有問題哦。
假設萬一,這700人,全部都考100分或全部都考0分,那也分不出10組啊。
因此,硬性規定要『分成幾組?』,就可能會有盲點產生。
所以,殺豬大才說是【趣味SQL】,純資料庫而己。
有兩種分法,一種是以現有資料來分,但是元素數量過少時....
另一種是 分組的間隔是定好的,就是直方圖的分法,那就每組不一定會有值.
對應的函數分別是 ntile(), width_bucket().
各位大大可以先找資料做些測試評估,也可以另外發技術分享文,跟大家分享心得.㊗️假日愉快.
mysql 沒有 width_bucket() ?
MySQL 沒有, PostgreSQL 有. 所以我們可以利用 PostgreSQL 的 MySQL_FDW 來抓取 MySQL 的資料做分析.
這樣就能兩種DB協作了.
請教殺豬大,那我上面的程式碼,是否也具有width_bucket()的作用?因為那也是【間隔是定好】的。
答案會不一樣嗎?
width_bucket() 只是方便,減少我們額外建立table.
我對幾位大大的測試實驗精神,感到欽佩.
以列表數來分段切段也有點怪
其實,以筆數分段切段也有其適用場合,
比如,【湧渡日月潭】為了避免所有人一齊下水造成危險,
因此,按報名次序,每100人成一組依序下水,也很合理。
所以資料分析還是要以實際需要來區分,才是重點,可能在我的領域裡,很少遇到這種命題,才會突然覺得怪吧,還真的要好好向各位討教了
japhenchen大大要不要也發個趣味SQL.
我發覺每次趣味SQL 都會讓我看到一些不同的想法,或是作法.
甚至可以延伸出其他有意思的應用.
寫了篇 width_bucket() 的介紹
https://ithelp.ithome.com.tw/articles/10254412