在SQL Server中使用CTE查询进行递归匹配

10

我有两个数据表(以下是定义它们的SQL语句)

IF EXISTS (SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'ETab')
DROP TABLE ETab;
GO

CREATE TABLE ETab 
    ([MRN] varchar(20), [LSPEC] varchar(2), [ADT] DATETIME, [SDT] DATETIME, [Source] varchar(20), [Enum] varchar(20));
GO

INSERT INTO ETab ([MRN], [LSPEC], [ADT], [SDT], [Source], [Enum]) 
VALUES 
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-04-01 00:00:00.000', 20),   CONVERT(datetime, '2017-04-30 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD04'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-05-01 00:00:00.000', 20),   CONVERT(datetime, '2017-05-31 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD05'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-06-01 00:00:00.000', 20),   CONVERT(datetime, '2017-06-30 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD06'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-07-01 00:00:00.000', 20),   CONVERT(datetime, '2017-07-31 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD07'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-08-01 00:00:00.000', 20),   CONVERT(datetime, '2017-08-31 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD08'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-09-01 00:00:00.000', 20),   CONVERT(datetime, '2017-09-30 00:00:00.000', 20),   'PRODPAT',  'HOMEBLD09'),
    ('HOMECARE',    'HQ',       CONVERT(datetime, '2017-04-01 00:00:00.000', 20),   CONVERT(datetime, '2017-04-30 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG04HM'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-05-01 00:00:00.000', 20),   CONVERT(datetime, '2017-05-31 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG05HM'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-06-01 00:00:00.000', 20),   CONVERT(datetime, '2017-06-30 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG06HM'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-07-01 00:00:00.000', 20),   CONVERT(datetime, '2017-07-31 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG07HM'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-08-01 00:00:00.000', 20),   CONVERT(datetime, '2017-08-31 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG08HM'),
    ('HOMECARE',    'HM',       CONVERT(datetime, '2017-09-01 00:00:00.000', 20),   CONVERT(datetime, '2017-09-30 00:00:00.000', 20),   'PRODPAT',  'HOMEDRG09HM'), 
    ('111824',      'UR',       CONVERT(datetime, '2017-09-22 00:00:00.000', 20),   CONVERT(datetime, '2017-09-22 00:00:00.000', 20),   'OP',   'OP1118240003'),
    ('111824',      'NL',       CONVERT(datetime, '2017-04-19 00:00:00.000', 20),   CONVERT(datetime, '2017-04-19 00:00:00.000', 20),   'OP',   'OP1118240001'),
    ('111824',      'MS',       CONVERT(datetime, '2017-06-30 00:00:00.000', 20),   CONVERT(datetime, '2017-06-30 00:00:00.000', 20),   'OP',   'OP1118240002'),
    ('111824',      'MS',       CONVERT(datetime, '2017-04-24 00:00:00.000', 20),   CONVERT(datetime, '2017-04-24 00:00:00.000', 20),   'IP',   'IP1118240001'),
    ('111824',      'MS',       CONVERT(datetime, '2017-04-28 00:00:00.000', 20),   CONVERT(datetime, '2017-04-28 00:00:00.000', 20),   'IP',   'IP1118240005'),
    ('111824',      'MS',       CONVERT(datetime, '2017-04-27 00:00:00.000', 20),   CONVERT(datetime, '2017-04-27 00:00:00.000', 20),   'IP',   'IP1118240004'),
    ('111824',      'MS',       CONVERT(datetime, '2017-04-26 00:00:00.000', 20),   CONVERT(datetime, '2017-04-26 00:00:00.000', 20),   'IP',   'IP1118240003'),
    ('111824',      'MS',       CONVERT(datetime, '2017-04-25 00:00:00.000', 20),   CONVERT(datetime, '2017-04-25 00:00:00.000', 20),   'IP',   'IP1118240002');
GO

IF EXISTS (SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'UTab')
DROP TABLE UTab;
GO

CREATE TABLE UTab 
    (MRN varchar(20), SIDate DATETIME, LSPEC varchar(2), Source varchar(20), Enum varchar(20), Iteration varchar(20));
GO

INSERT INTO UTab 
    (MRN, SIDate, LSPEC, Source, Enum, Iteration) 
VALUES 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-17 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HQ', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-04-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-01 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-06-26 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-30 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL), 
('HOMECARE', CONVERT(datetime, '2017-05-04 00:00:00.000', 20), 'HM', 'N', NULL, NULL),
('111824', CONVERT(datetime, '2017-04-21 00:00:00.000', 20), 'MS', 'IP', NULL, NULL), 
('111824', CONVERT(datetime, '2017-04-24 00:00:00.000', 20), 'NL', 'OP', NULL, NULL), 
('111824', CONVERT(datetime, '2017-04-27 00:00:00.000', 20), 'NL', 'OP', NULL, NULL), 
('111824', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'NL', 'OP', NULL, NULL), 
('111824', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'NL', 'OP', NULL, NULL), 
('111824', CONVERT(datetime, '2017-04-20 00:00:00.000', 20), 'NL', 'OP', NULL, NULL);
GO

SELECT * FROM ETab
WHERE Source = 'PRODPAT' AND LSPEC = 'HM' 
GO

SELECT * FROM UTab  
WHERE LSPEC = 'HM';
GO

IF EXISTS (SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__Tmp')
DROP TABLE __Tmp;
GO

我有一个C#应用程序,在其中用户在运行时请求某些匹配子句。使用用户指定的信息,我生成一些SQL语句并执行更新UTab

通过生成的SQL语句,我希望执行一些匹配操作,根据链接字段[MRN]以及[UTab].[SIDate]是否位于[ETab].[ADT][ETab].[SDT]的前后指定天数内更新[UTab].[Enum]。因此,用户可能会指定两个命令,第一个:

MatchCmd:MRN,LSPEC:ETab:UTab:ADT:SDT:SIDate:0:1:'Iteration#1':WHERE [UTab].[Source] = 'OP' AND [UTab].[LSPEC] = [ETab].[LSPEC] 

这段话的意思是,在源表格 ETab 中匹配 MRNLSPEC,并更新目标表格 UTab,其中满足条件: SIDate > ADT - 0 day(s)SIDate < SDT + 1 day(s),以及 [UTab].[Source] = 'OP'。我会用“迭代号” Iteration#1 标记所有符合条件的匹配项。

我的代码会生成以下 SQL:

;WITH cte AS (
    SELECT [ETab].[Enum] AS Enum, 
           [ETab].[MRN] AS Link, 
           [ETab].[ADT] AS ADT, 
           [ETab].[SDT] AS SDT, 
           [UTab].[SIDate] AS DT, 
           [ETab].[MRN] AS [MRN], 
           [ETab].[LSPEC] AS [LSPEC], 
           [ETab].[Source] AS [Source], 
           ROW_NUMBER() OVER (PARTITION BY [UTab].[MRN], [UTab].[LSPEC], [UTab].[SIDate] 
                ORDER BY ABS(DATEDIFF(mi, [UTab].[SIDate], [ETab].[ADT]))) AS Idx, 
           ABS(DATEDIFF(mi, [UTab].[SIDate], [ETab].[ADT])) AS Diff 
    FROM [UTab] 
        LEFT JOIN [ETab] ON [UTab].[MRN] = [ETab].[MRN] 
    WHERE ([UTab].[SIDate] BETWEEN 
        DATEADD(dd, -0, [ETab].[ADT]) AND 
        DATEADD(dd, 1, [ETab].[SDT]) AND [Iteration] IS NULL) 
            AND ETab.Source = 'OP' 
)  SELECT * 
   INTO __Tmp 
   FROM cte; 
GO

为了获取符合条件(在时间窗口内且遵守自定义 where 子句)的所有记录。然后通过另一个生成的查询来更新 [UTab].[Enum][UTab].[Iteration]

UPDATE [UTab] 
SET [ENum] = [__Tmp].[ENum], [Iteration] = N'Iteration#1' 
--SELECT __Tmp.ENum, __Tmp.Link, __Tmp.LSPEC, __Tmp.ADT, __Tmp.SDT, __Tmp.DT, __Tmp.Idx 
FROM [UTab] AS up 
    INNER JOIN [__Tmp] 
        ON [up].[MRN] = [__Tmp].[Link] 
           AND [up].[SIDate] = [__Tmp].[DT] 
           AND [up].[LSPEC] = [__Tmp].[LSPEC] 
WHERE __Tmp.Idx = 1;

看起来这个方法和SQL语句运行得不错,但是有一些问题:

问:我使用的方法/SQL是否有明显的问题?

感谢您的时间。


如果你把这个问题简化一下,我认为你可以得到更好的答案。对我来说,书面要求不太清楚。 - paparazzo
在您的第一个查询中,您需要将where子句中的所有内容移动到on子句中。在更新语句中,最好将AND up.SIDate = __Tmp.DTAND up.LSPEC = __Tmp.LSPEC移动到on子句中以获得语义,但这并不是关键。 - Zohar Peled
如果您编辑了这个问题,Q2现在已经过时,那么是否值得再次编辑以删除该问题? - MJH
3个回答

3
由于您正在搜索符合条件并通过rn进行过滤的任何内容,因此up.L != cte.L.
SELECT cte.E, [Iteration] = N'00-00-00-CA', *
FROM [Up] 
    INNER JOIN cte ON [Up].[M] = [cte].[M] AND [cte].[rn] = 1
WHERE [cte].[E] IS NOT NULL AND (
    [Up].[DTE] BETWEEN 
        DATEADD(dd, -0, [cte].[ADT]) AND 
        DATEADD(dd, 0, [cte].[SDT])) 
            AND [Up].[F] = 'Y' 
            AND [Up].[S] = 'HC' 
            ----comment this line
            --AND [Up].[L] = [cte].[L]; -- <<<<<<<<<<<<<<<<

http://sqlfiddle.com/#!18/d1483/2/0

我修改了一些数据:添加了ZZ,这完全破坏了你的查询。只有两行匹配。

ps 修复了插入问题,包括E列长度和第一个插入中列名的问题。


但是如果删除 AND [Up].[L] = [cte].[L],那么第一个查询就会出错。 - MoonKnight
在cte内部,我猜你确实需要它。但在外部查询中,对我来说没有意义。 - Ivan Starostin

3

对我来说,CTE的用法有点奇怪,因为在随后的查询中,你并没有真正地使用它。我建议将其移至UPDATE语句中。

这个查询似乎并没有真正地关联“Source”。我不确定这是否是意图。如果UTab针对MRN/LSPEC组合有多个来源,可能会导致问题。

因此,我想到了以下的解决方案:

DECLARE @ADT_Adjustment INT = 0;
DECLARE @SDT_Adjustment INT = 1;
DECLARE @Iteration INT = 1;

WITH SequencedJoin AS (
    SELECT
        ETab.MRN, ETab.LSPEC, ETab.ADT, ETab.SDT, UTab.SIDate, ETab.Enum, ETab.[Source], UTab.Enum AS WriteEnum, UTab.Iteration AS WriteIteration
    ,   DENSE_RANK() OVER ( 
            PARTITION BY UTab.MRN, UTab.LSPEC, UTab.[Source], UTab.SIDate 
            ORDER BY ABS( DATEDIFF( MINUTE, UTab.SIDate, ETab.ADT ) ) 
        ) AS Ordinal
    FROM
        @UTab AS UTab
    JOIN    @ETab AS ETab ON (
            ETab.MRN = UTab.MRN
        AND ETab.LSPEC = UTab.LSPEC
        AND ETab.[Source] = UTab.[Source]
        AND UTab.SIDate BETWEEN DATEADD( dd, -@ADT_Adjustment, ETab.ADT ) AND DATEADD( dd, @SDT_Adjustment, ETab.SDT )
        )
    WHERE
        UTab.Iteration IS NULL
)
UPDATE
    SequencedJoin
SET
    WriteEnum = SequencedJoin.Enum
,   WriteIteration = N'Iteration#' + CAST( @Iteration AS VARCHAR( 2 ) )
WHERE
    SequencedJoin.[Source] = 'OP'
AND SequencedJoin.Ordinal = 1

2

虽然不是完整的答案,但这个索引将加速您的CTE:

CREATE INDEX T1 ON UTAB (
    MRN,
    SIDATE
)
INCLUDE
(
    LSPEC,
    Iteration
)

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接