TSQL查询以连接和删除共同前缀

3

我有一些数据

id    ref
==   ==========
1    3536757616
1    3536757617
1    3536757618

我想获取结果

1    3536757616/7/8

因此,基本上数据是按ID聚合的,参考文献被连接在一起,用斜杠“/”分隔,但删除任何公共前缀。如果数据如下:

id    ref
==   ==========
2    3536757628
2    3536757629
2    3536757630

I would want to get the result

2    3536757629/28/30

我知道我可以通过使用简单地连接引用来进行操作。
SELECT distinct
    id,
    stuff ( ( SELECT
                  '/ ' + ref 
              FROM
                  tableA tableA_1
              where tableA_1.id = tableA_2.id
    FOR XML PATH ( '' ) ) , 1 , 2 , '' )
from TableA tableA_2

提供

1   3536757616/ 3536757617/ 3536757618
2   3536757628/ 3536757629/ 3536757630

但是我需要的是删除公共元素的部分......


测试数据的代码:

create table tableA (id int, ref varchar(50))

insert into tableA
select 1, 3536757616
union select 1, 3536757617
union select 1, 3536757618
union select 2, 3536757628
union select 2, 3536757629
union select 2, 3536757630
3个回答

3
WITH hier(cnt) AS
        (
        SELECT  1
        UNION ALL
        SELECT  cnt + 1
        FROM    hier
        WHERE   cnt <= 100
        )
SELECT  CASE WHEN ROW_NUMBER() OVER (ORDER BY id) = 1 THEN ref ELSE ' / ' + SUBSTRING(ref, mc + 1, LEN(ref)) END 
FROM    (
        SELECT  MIN(common) AS mc
        FROM    (
                SELECT  (
                        SELECT  MAX(cnt)
                        FROM    hier
                        WHERE   SUBSTRING(initref, 1, cnt) = SUBSTRING(ref, 1, cnt)
                                AND cnt <= LEN(ref)
                        ) AS common
                FROM    (
                        SELECT  TOP 1 ref AS initref
                        FROM    tableA
                        ) i,
                        tableA
                ) q
        ) q2, tableA
FOR XML PATH('')

---

3536757616 / 17 / 18 / 28 / 29 / 30

同样的道理也适用于组:

WITH hier(cnt) AS
        (
        SELECT  1
        UNION ALL
        SELECT  cnt + 1
        FROM    hier
        WHERE   cnt <= 100
        )
SELECT  (
        SELECT  CASE WHEN ROW_NUMBER() OVER (ORDER BY a2.ref) = 1 THEN ref ELSE ' / ' + SUBSTRING(ref, mc + 1, LEN(ref)) END 
        FROM    tableA a2
        WHERE   a2.id = q2.id
        FOR XML PATH('')
        )
FROM    (
        SELECT  id, MIN(common) AS mc
        FROM    (
                SELECT  a.id,
                        (
                        SELECT  MAX(cnt)
                        FROM    hier
                        WHERE   SUBSTRING(i.initref, 1, cnt) = SUBSTRING(a.ref, 1, cnt)
                                AND cnt <= LEN(ref)
                        ) AS common
                FROM    (
                        SELECT  id, MIN(ref) AS initref
                        FROM    tableA
                        GROUP BY
                                id
                        ) i
                JOIN    tableA a
                ON      i.id = a.id
                ) q
        GROUP BY
                id
        ) q2
---
3536757616 / 7 / 8
3536757628 / 29 / 30

我的头都疼了,试图理解这个问题,但这确实是一种非常好的方法。远远优于其他两个答案(包括我的)。 - Lieven Keersmaekers
@Lieven:将是我明天博客的主题。 - Quassnoi
只需在最终选择中添加id,就可以完美了! - Kev Riley

1

我将我的表命名为#T,并使用以下SELECT语句

select id, number, substring(#t.ref, 1, v.number), count(id)
from master.dbo.spt_values v
inner join #t on v.number <= len(#t.ref)
where v.name is null and v.number > 0 
group by id, number, substring(#t.ref, 1, v.number)
order by id, count(id) desc, number desc

您将获得一个结果集,其中每个ID的第一条记录包含该ID的最大长度和最长初始字符串。

这并不是完整的解决方案,但是是一个很好的起点:迭代ID,并发出SELECT TOP 1以检索最长的字符串,并将每个记录的字符串差与具有相同ID的记录连接起来。


0
declare @tableA table(id int, ref varchar(50))
declare @suffix table(id int, suffix varchar(50))
declare @todo table(id int)

insert into @tableA
select 1, '3536757616'
union select 1, '3536757617'
union select 1, '3536757618'
union select 2, '3536757628'
union select 2, '3536757629'
union select 2, '3536757630'

insert into @suffix
select * from @tableA

insert into @todo
select s1.id
from (
    select id, cnt = count(*)
    from @suffix
    group by id, substring(suffix, 1, 1) 
  ) s1 
  inner join (
    select id, cnt = count(*)
    from @suffix
    group by id
  ) s2 on s2.id = s1.id and s2.cnt = s1.cnt

while exists (select * from @todo)
begin
  update @suffix
  set suffix = substring(suffix, 2, len(suffix) - 1)
  from @suffix s
       inner join @todo t on t.id = s.id

  delete from @todo

  insert into @todo
  select s1.id
  from (
      select id, cnt = count(*)
      from @suffix
      group by id, substring(suffix, 1, 1) 
    ) s1 
    inner join (
      select id, cnt = count(*)
      from @suffix
      group by id
    ) s2 on s2.id = s1.id and s2.cnt = s1.cnt  
end

select * from @suffix

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接