iT邦幫忙

2

[SQL]連續範圍排除另一個範圍問題

在 S.O上有一個SQL問題Splits records based on range

在大家分享我的解法

有兩張表

Item Table

| Id | ItemId | FROM |  To |
|----|--------|------|-----|
|  1 |      1 |    1 | 100 |
|  2 |      1 |  101 | 500 |
|  3 |      1 |  600 | 700 |

ItemRange Table

| Id | ItemId | FROM | To |
|----|--------|------|----|
|  1 |      1 |   50 | 60 |
|  2 |      1 |   70 | 80 |

期望輸出

| ItemId | FROM |  TO |
|--------|------|-----|
|      1 |    1 |  49 |
|      1 |   61 |  69 |
|      1 |   81 | 500 |
|      1 |  600 | 700 |

SQLfiddle


問題描述:

提問者想要產生一個結果集,關於Item表依照 ItemId 創立 [FROM][TO] 的值,但要排除在
ItemRange表連續範圍內的值。

例如:
Item表有一列 1 ~ 100

| ItemId | FROM  |  To |
| 1      |    1  | 100 |

但由於ItemRange表有兩列 50 ~ 6070 ~ 80

| ItemId | FROM  |  To |
| 1      |   50  | 60  |
| 1      |   70  | 80  |

所以這部份期望輸出,Item表要排除ItemRange表連續範圍數字

如下

| ItemId | FROM |  TO |
|--------|------|-----|
|      1 |    1 |  49 |
|      1 |   61 |  69 |
|      1 |   81 | xxx |

我看到連續範圍,就會聯想這個是一個(Islands and gaps problem)

我的想法是要先兩個full連續範圍資料表,我先使用CTE遞迴來實作

  1. Item
  2. ItemRange

之後再使用 except 對於 Item 排除 ItemRange 要排除的數字範圍

;WITH CTE AS (
  SELECT ItemId,[FROM],[TO]
  FROM Item
  UNION ALL
  SELECT ItemId,[FROM]+ 1,[TO]
  FROM CTE
  WHERE [FROM]+ 1 <= [TO]
), CTE2 AS(
  SELECT ItemId,[FROM],[TO]
  FROM ItemRange
  UNION ALL
  SELECT  ItemId,[FROM]+ 1,[TO]
  FROM CTE2
  WHERE [FROM]+ 1 <= [TO]
),CTE3 AS(
  SELECT ItemId,[FROM]
  FROM CTE
  except
  SELECT ItemId,[FROM]
  FROM CTE2
)

最後就是重頭戲,使用ROW_NUMBER + Window Function 做出標示連續範圍的群組

SELECT ItemId,
       MIN([FROM]) 'FROM',
       MAX([FROM]) 'TO'
FROM (
  SELECT ItemId,[FROM],[FROM] - ROW_NUMBER() OVER(ORDER BY [FROM]) grp
  FROM CTE3
) t1
GROUP BY grp,ItemId
option (maxrecursion 0)

解答SQLfiddle

目前提問者還有一個問題是他的範圍太大,導致執行效能不是很好 這個問題我目前還沒有特別的解法 歡迎有想法的大大可以提供建議

P.S. 我覺得此問題很清楚,但我不知道為什麼會被投Close


1 則留言

1
fysh711426
iT邦研究生 4 級 ‧ 2019-04-05 04:57:27

這題好難,想試試看能不能不用迴圈寫出來。 /images/emoticon/emoticon16.gif

作法分為兩階段:

  1. 先合併 Item 連續或重疊的區間
SELECT A.ItemId, A.[FROM],
	   CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
	   --重新編一個Id
	   A.Id+'-1' AS Id,
	   A.Id AS PId1,
	   CAST(B.Id AS NVARCHAR(MAX)) AS PId2
FROM CTE_MERGE AS A

找到可以合併的區間後,會加入一筆新的區間,並紀錄 PId1、PId2 代表被合併的兩個區間,之後 CLEAR CTE 中就可以利用 PId1、PId2 排除不要的資料。

  1. 將 ItemRange 從 Item 內排除
CROSS APPLY (VALUES(1),(2)) AS C(G)

這邊需要把區間拆開,一直想不到用什麼方法可以將一筆資料拆成兩筆,大部分時間是卡在這裡,最後發現可以用 CROSS APPLY + VALUES,在配合 CASE WHEN 就可以漂亮的把區間拆開。

CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],

最後這邊是關鍵,本來我以為需要寫很多判斷,因為 Item、ItemRange 交集會產生很多種可能,但很神奇的是只用兩行就完成了,出乎意料。

情況如下:

第一種:

A:10-20   [   ]      
B:18-25      [   ]   

Range1: 10-17
Range2: 26-20

第二種:

A:10-20      [   ]      
B:05-12   [   ]   

Range1: 10-4
Range2: 13-20

第三種:

A:10-20   [   ]      
B:12-18    [ ]   

Range1: 10-11
Range2: 19-20

第四種:

A:10-20    [ ]      
B:05-25   [   ]   

Range1: 10-04
Range2: 16-20

第五種:

A:10-20   [   ]      
B:10-20   [   ]   

Range1: 10-09
Range2: 21-20

最後只要把負區間去掉,就是期望的結果。

結果

 ItemId	 FROM	 TO
--------|-----|-------
   1      1	     49
   1      61	 69
   1      81	 500
   1      600	 700

總共用了四個 CTE 完成:

DECLARE @Item TABLE
(
	[Id] INT,
	[ItemId] INT, 
	[FROM] INT,
	[TO] INT
)

DECLARE @ItemRange TABLE
(
	[Id] INT,
	[ItemId] INT, 
	[FROM] INT,
	[TO] INT
)

INSERT INTO @Item VALUES
(1,1,1,100),
(2,1,101,500),
(3,1,600,700)

INSERT INTO @ItemRange VALUES
(1,1,50,60),
(2,1,70,80)

--合併Item重疊部分
--排除被完全包含的區間 A[ B[] ]
--並合併下面這種類型的區間
-- A[   ]
-- B    [   ]
;WITH CTE_MERGE AS
(
	SELECT A.ItemId, A.[FROM], A.[TO],
	       CAST(A.Id AS NVARCHAR(MAX)) AS Id,
	       CAST(NULL AS NVARCHAR(MAX)) AS PId1, 
		   CAST(NULL AS NVARCHAR(MAX)) AS PId2
	FROM @Item AS A
	LEFT JOIN @Item AS B
		ON B.ItemId=A.ItemId AND
		   --排除A被包在B的資料,這種資料會造成下方無窮迴圈
		   B.Id<>A.Id AND
		   B.[FROM] <= A.[FROM] AND B.[TO] >= A.[TO]
	WHERE B.Id IS NULL
	UNION ALL
	SELECT A.ItemId, A.[FROM],
		   CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
		   --重新編一個Id
		   A.Id+'-1' AS Id,
		   A.Id AS PId1,
		   CAST(B.Id AS NVARCHAR(MAX)) AS PId2
	FROM CTE_MERGE AS A
	CROSS APPLY (
		SELECT *
		FROM (
			SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @Item AS B
			WHERE B.ItemId=A.ItemId AND
			      --找到可以合併的區間
			      CAST(B.Id AS NVARCHAR(MAX))<>A.Id AND
				  A.[TO]+1 >= B.[FROM] AND A.[TO]+1 <= B.[TO]
		) AS X
		WHERE X.R=1
	) AS B
)

--SELECT * FROM CTE_MERGE

--排除遞迴中殘留的部分
,CTE_MERGE_CLEAR AS 
(
	SELECT A.Id, A.ItemId, A.[FROM], A.[TO]
	FROM CTE_MERGE AS A
	LEFT JOIN CTE_MERGE AS B ON A.Id=B.PId1 OR A.Id=B.PId2
	--沒有父Id的資料才是最後結果
	WHERE B.Id IS NULL
)

--SELECT * FROM CTE_MERGE_CLEAR

--排除ItemRange部分
--判斷重疊有這五種可能
-- A [   ]  [ ]    [   ] [   ]   [   ]
-- B  [ ]  [   ] [   ]     [   ] [   ]
,CTE AS
(
	SELECT ItemId, [FROM], [TO], Id,
		   CAST(NULL AS NVARCHAR(MAX)) AS PId
	FROM CTE_MERGE_CLEAR
	UNION ALL
	SELECT *
	FROM (
		SELECT A.ItemId,
		       CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
		       CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],
			   --重新編一個Id
		       CASE WHEN G=1 THEN A.Id+'-1' ELSE A.Id+'-2' END AS Id,
			   A.Id AS PId
		FROM CTE AS A
		CROSS APPLY (
			SELECT *
			FROM (
				SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @ItemRange AS B
				WHERE B.ItemId=A.ItemId AND
				      --找到Item和ItemRange重疊的部分
					  ((A.[FROM] >= B.[FROM] AND A.[FROM] <= B.[TO]) OR
					   (A.[TO] >= B.[FROM] AND A.[TO] <= B.[TO]) OR
					   (A.[FROM] <= B.[FROM] AND A.[TO] >= B.[TO]))
			) AS X
			--取一筆就好,一次迴圈拆一次區間就好
			WHERE X.R=1
		) AS B
		--將一筆資料拆成兩筆
		CROSS APPLY (VALUES(1),(2)) AS C(G)
	) AS X
	--負區間就不要了
	WHERE X.[TO]-X.[FROM] > 0
)

--SELECT * FROM CTE

--排除遞迴中殘留的部分
,CTE_CLEAR AS 
(
	SELECT A.ItemId, A.[FROM], A.[TO]
	FROM CTE AS A
	LEFT JOIN CTE AS B ON A.Id=B.PId
	--沒有父Id的資料才是最後結果
	WHERE B.Id IS NULL
)

SELECT * 
FROM CTE_CLEAR
ORDER BY ItemId, [FROM]

SQL


CTE 效能不是很好,用 TEMP TABLE 優化:

DECLARE @Item TABLE
(
	[Id] INT,
	[ItemId] INT, 
	[FROM] INT,
	[TO] INT
)

DECLARE @ItemRange TABLE
(
	[Id] INT,
	[ItemId] INT, 
	[FROM] INT,
	[TO] INT
)

INSERT INTO @Item VALUES
(1,1,1,100),
(2,1,101,500),
(3,1,600,700)

INSERT INTO @ItemRange VALUES
(1,1,50,60),
(2,1,70,80)

--暫存表優化CTE
DECLARE @TEMP_MERGE TABLE
(
	ItemId INT, 
	[FROM] INT,
	[TO] INT,
	Id NVARCHAR(MAX),
	PId1 NVARCHAR(MAX),
	PId2 NVARCHAR(MAX)
)
DECLARE @TEMP_MERGE_CLEAR TABLE
(
	Id NVARCHAR(MAX),
	ItemId INT, 
	[FROM] INT,
	[TO] INT
)
DECLARE @TEMP TABLE
(
	ItemId INT, 
	[FROM] INT,
	[TO] INT,
	Id NVARCHAR(MAX),
	PId NVARCHAR(MAX)
)

--合併Item重疊部分
;WITH CTE_MERGE AS
(
	SELECT A.ItemId, A.[FROM], A.[TO],
	       CAST(A.Id AS NVARCHAR(MAX)) AS Id,
	       CAST(NULL AS NVARCHAR(MAX)) AS PId1, 
		   CAST(NULL AS NVARCHAR(MAX)) AS PId2
	FROM @Item AS A
	LEFT JOIN @Item AS B
		ON B.ItemId=A.ItemId AND
		   B.Id<>A.Id AND
		   B.[FROM] <= A.[FROM] AND B.[TO] >= A.[TO]
	WHERE B.Id IS NULL
	UNION ALL
	SELECT A.ItemId, A.[FROM],
		   CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
		   A.Id+'-1' AS Id, 
		   A.Id AS PId1,
		   CAST(B.Id AS NVARCHAR(MAX)) AS PId2
	FROM CTE_MERGE AS A
	CROSS APPLY (
		SELECT *
		FROM (
			SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @Item AS B
			WHERE B.ItemId=A.ItemId AND
			      CAST(B.Id AS NVARCHAR(MAX))<>A.Id AND
				  A.[TO]+1 >= B.[FROM] AND A.[TO]+1 <= B.[TO]
		) AS X
		WHERE X.R=1
	) AS B
)

INSERT INTO @TEMP_MERGE 
SELECT * FROM CTE_MERGE

--SELECT * FROM @TEMP_MERGE

--排除遞迴中殘留的部分
;WITH CTE_MERGE_CLEAR AS 
(
	SELECT A.Id, A.ItemId, A.[FROM], A.[TO]
	FROM @TEMP_MERGE AS A
	LEFT JOIN @TEMP_MERGE AS B ON A.Id=B.PId1 OR A.Id=B.PId2
	WHERE B.Id IS NULL
)

INSERT INTO @TEMP_MERGE_CLEAR 
SELECT * FROM CTE_MERGE_CLEAR

--SELECT * FROM @TEMP_MERGE_CLEAR

--排除ItemRange部分
;WITH CTE AS
(
	SELECT ItemId, [FROM], [TO], Id,
		   CAST(NULL AS NVARCHAR(MAX)) AS PId
	FROM @TEMP_MERGE_CLEAR
	UNION ALL
	SELECT *
	FROM (
		SELECT A.ItemId,
		       CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
		       CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],
		       CASE WHEN G=1 THEN A.Id+'-1' ELSE A.Id+'-2' END AS Id,
			   A.Id AS PId
		FROM CTE AS A
		CROSS APPLY (
			SELECT *
			FROM (
				SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @ItemRange AS B
				WHERE B.ItemId=A.ItemId AND
					  ((A.[FROM] >= B.[FROM] AND A.[FROM] <= B.[TO]) OR
					   (A.[TO] >= B.[FROM] AND A.[TO] <= B.[TO]) OR
					   (A.[FROM] <= B.[FROM] AND A.[TO] >= B.[TO]))
			) AS X
			WHERE X.R=1
		) AS B
		CROSS APPLY (VALUES(1),(2)) AS C(G)
	) AS X
	WHERE X.[TO]-X.[FROM] > 0
)

INSERT INTO @TEMP
SELECT * FROM CTE

--SELECT * FROM @TEMP

--排除遞迴中殘留的部分
;WITH CTE_CLEAR AS 
(
	SELECT A.ItemId, A.[FROM], A.[TO]
	FROM @TEMP AS A
	LEFT JOIN @TEMP AS B ON A.Id=B.PId
	WHERE B.Id IS NULL
)

SELECT * 
FROM CTE_CLEAR
ORDER BY ItemId, [FROM]

SQL

我要留言

立即登入留言