使用SQL计算连续重复字段的数量

4

我有这些数据在myTable表中:

  Date           Status    PersonID
-----------------------------------------
   2018/01/01         2        2015     ┐  2
   2018/01/02         2        2015     ┘
   2018/01/05         2        2015     ┐
   2018/01/06         2        2015       3
   2018/01/07         2        2015     ┘
   2018/01/11         2        2015     - 1
   2018/01/01         2        1018     - 1
   2018/01/03         2        1018     - 1
   2018/01/05         2        1018     ┐ 2
   2018/01/06         2        1018     ┘
   2018/01/08         2        1018     ┐ 2
   2018/01/09         2        1018     ┘
   2018/01/03         2        1625     ┐
   2018/01/04         2        1625       4
   2018/01/05         2        1625     
   2018/01/06         2        1625     ┘
   2018/01/17         2        1625     - 1
   2018/01/29         2        1625     - 1
-----------------------------------

我需要对连续重复的值进行计数,如下所示:

我需要的结果是这样的:

   count    personid
    -----------------
    2        2015
    3        2015
    1        2015
    1        1018
    1        1018
    2        1018
    2        1018
    4        1625
    1        1625
    1        1625

我正在使用SQL Server 2016 - 请帮忙


1
你曾经尝试过什么? - D-Shih
1
为什么要给我点踩,如果我能解决问题就不会在这里问了。 - Alavi
阅读下降投票的替代文本:“这个问题没有显示任何研究努力”。 - Dale K
5个回答

4
这是一个“间隙和岛屿”问题,您可以尝试以下方法。
;with cte 
     as (select *, 
                dateadd(day, -row_number() 
                                over (partition by status, personid 
                                  order by [date] ), [date]) AS grp 
         FROM   @table
     )
     ,cte1 
     AS (select *,row_number() over(partition by  personid, grp,status order by [date]) rn,
                count(*) over(partition by personid, grp) ct 
         from   cte
        ) 

select  ct as count, personid 
from cte1 
where rn=1

在线演示

注意: 由于没有可以用于按照期望输出方式排序的列,因此您可能无法按照相同顺序获取行。


2
这种问题被称为“间隙和岛屿”。您要么识别连续的数据集(岛屿),要么识别两个岛屿之间的值范围(间隙)。有许多不同的方法可以实现这些结果,并且对于大型数据集也表现良好。您可以参考下面写得很好的文章。

https://www.itprotoday.com/sql-server/solving-gaps-and-islands-enhanced-window-functions

https://www.red-gate.com/simple-talk/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/

https://www.sqlshack.com/data-boundaries-finding-gaps-islands-and-more/

这是对你的问题的尝试。
CREATE TABLE #test 
(
     dt DATETIME
    ,Status INT
    ,PersonID INT
)

INSERT INTO #Test (dt, Status, PersonID) VALUES
('2018/01/01', 2, 2015),
('2018/01/02', 2, 2015),
('2018/01/05', 2, 2015),
('2018/01/06', 2, 2015),
('2018/01/07', 2, 2015),
('2018/01/11', 2, 2015),
('2018/01/01', 2, 1018),
('2018/01/03', 2, 1018),
('2018/01/05', 2, 1018),
('2018/01/06', 2, 1018),
('2018/01/08', 2, 1018),
('2018/01/09', 2, 1018),
('2018/01/03', 2, 1625),
('2018/01/04', 2, 1625),
('2018/01/05', 2, 1625),
('2018/01/06', 2, 1625),
('2018/01/17', 2, 1625),
('2018/01/29', 2, 1625)

;with cte_dt_from
AS
(
    SELECT PersonID, MIN(Dt) dt_from_start
    FROM #Test
    GROUP BY PersonID
),
cte_offset_num
AS
(
SELECT      T1.PersonID, T1.dt, DATEDIFF(DAY, T2.dt_from_start, T1.dt) dt_offset
FROM        #test T1
INNER JOIN  cte_dt_from T2 ON T2.PersonID = T1.PersonID
),
cte_starting_point
AS
(
    SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
    FROM cte_offset_num AS A
    WHERE NOT EXISTS (
        SELECT *
        FROM cte_offset_num AS B
        WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset - 1)
)
,
cte_ending_point
AS
(
    SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
    FROM cte_offset_num AS A
    WHERE NOT EXISTS (
        SELECT *
        FROM cte_offset_num AS B
        WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset + 1)
)
SELECT (E.dt_offset - S.dt_offset)  + 1 AS [count], S.PersonID
FROM cte_starting_point AS S
JOIN cte_ending_point AS E ON E.PersonID = S.PersonID AND E.rownum = S.rownum
ORDER BY S.PersonID;

DROP TABLE #Test;

1
WITH T1 AS
(SELECT Date,
       Date - ROW_NUMBER() OVER (PARTITION BY Status, PersonID ORDER BY Date) AS Grp
FROM myTable)
SELECT personid,
       ROW_NUMBER() OVER (PARTITION BY Grp ORDER BY Date) AS Consecutive
FROM T1

在这个结果上,您可以应用 MAX() 函数,以获取每个 personid 的记录数。

请参考 this 问题以获取详细拆分信息。


我们没有名为C的列,无法使用“ORDER BY C”。 - Alavi
不能将日期和数字相减,如下所示:"Date - ROW_NUMBER() OVER (PARTITION BY Status, PersonID ORDER BY Date) AS Grp"。 - Alavi
更正了。那是一个打字错误。 - Tom J Muthirenthi

1

找出两个日期之间的差距是主要挑战,对于每个日期,您可以使用row_number()分析函数和datediff函数来创建该差距

with cte as
(

select '2018-01-01' as d, 2 as id , 2015 as pid
union all
select '2018-01-02',2,2015
union all
select '2018-01-05',2,2015 union all
select '2018-01-06',2,2015 union all
select '2018-01-07',2,2015 
union all
select '2018-01-11',2,2015  


), cte1 as (SELECT *, 
                datediff(day, Row_number() 
                                OVER ( 
                                  partition BY id, pid 
                                  ORDER BY [d] ), [d]) AS dif
         FROM   cte
         ) select distinct pid,count(*) over(partition by pid,dif) as cnt from cte1

0

这是最简单、最小的查询。

 CREATE TABLE #T (
      [Date] date,
      [Status] int,
      PersonId int
    );
    INSERT #T
      VALUES ('2018/01/01', 2, 2015),
      ('2018/01/02', 2, 2015),
      ('2018/01/05', 2, 2015),
      ('2018/01/06', 2, 2015),
      ('2018/01/07', 2, 2015),
      ('2018/01/11', 2, 2015),
      ('2018/01/01', 2, 1018),
      ('2018/01/03', 2, 1018),
      ('2018/01/05', 2, 1018),
      ('2018/01/06', 2, 1018),
      ('2018/01/08', 2, 1018),
      ('2018/01/09', 2, 1018),
      ('2018/01/03', 2, 1625),
      ('2018/01/04', 2, 1625),
      ('2018/01/05', 2, 1625),
      ('2018/01/06', 2, 1625),
      ('2018/01/17', 2, 1625),
      ('2018/01/29', 2, 1625)


    SELECT
      MAX(cnt),
      personid
    FROM (SELECT
      ROW_NUMBER() OVER (PARTITION BY GRP ORDER BY [Date]) AS cnt,
      personid,
      GRP
    FROM (SELECT
      personid,
      [Date],
      DATEDIFF(DAY, '1900-01-01', [Date]) - ROW_NUMBER() OVER (ORDER BY Personid DESC) AS GRP
    FROM #T) A) AS B
    GROUP BY personid,
             GRP
    ORDER BY PersonId DESC

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接