RowID int not null identity(1,1) primary key,
Col1 varchar(20) not null,
Col2 varchar(2048) not null,
Col3 tinyint not null
我该怎么做?
RowID int not null identity(1,1) primary key,
Col1 varchar(20) not null,
Col2 varchar(2048) not null,
Col3 tinyint not null
我该怎么做?
这是一篇关于删除重复数据的好文章。
它讨论了为什么很难做到:“SQL基于关系代数,而在关系代数中不允许出现重复项,因为集合中不允许有重复项。”
文章介绍了使用临时表的解决方法以及两个MySQL示例。
在未来,您将从数据库层面还是应用程序层面预防重复数据?我建议从数据库层面考虑,因为数据库应该负责维护引用完整性,开发人员可能会造成问题;)
另一种方法是创建一个具有相同字段和唯一索引的新表。然后将旧表中的所有数据移动到新表中。自动 SQL SERVER 忽略重复值(还有一个关于如果存在重复值要执行什么操作的选项:忽略、中断或其他)。因此,我们可以得到没有重复行的相同表格。如果不想要唯一索引,在传输数据之后,可以删除它。
特别是对于较大的表格,您可以使用 DTS(SSIS 数据导入/导出包)快速地将所有数据转移到新的带有唯一索引的表格中。对于 700 万行,只需要几分钟。
以下查询可用于根据单个列或多个列删除重复记录。以下查询基于两个列进行删除。表名称为:testing
,列名称为empno,empname
DELETE FROM testing WHERE empno not IN (SELECT empno FROM (SELECT empno, ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno)
AS [ItemNumber] FROM testing) a WHERE ItemNumber > 1)
or empname not in
(select empname from (select empname,row_number() over(PARTITION BY empno ORDER BY empno)
AS [ItemNumber] FROM testing) a WHERE ItemNumber > 1)
Create new blank table with the same structure
Execute query like this
INSERT INTO tc_category1
SELECT *
FROM tc_category
GROUP BY category_id, application_id
HAVING count(*) > 1
Then execute this query
INSERT INTO tc_category1
SELECT *
FROM tc_category
GROUP BY category_id, application_id
HAVING count(*) = 1
DELETE A
FROM TABLE A,
TABLE B
WHERE A.COL1 = B.COL1
AND A.COL2 = B.COL2
AND A.UNIQUEFIELD > B.UNIQUEFIELD
SET ROWCOUNT 1 -- or set to number of rows to be deleted
delete from myTable where RowId = DuplicatedID
SET ROWCOUNT 0
DELETE
FROM
table_name T1
WHERE
rowid > (
SELECT
min(rowid)
FROM
table_name T2
WHERE
T1.column_name = T2.column_name
);
从应用程序层面(不幸的是)来看,我同意防止重复的正确方式是通过使用唯一索引在数据库层面上完成,但在SQL Server 2005中,索引只允许为900个字节,而我的varchar(2048)字段超出了这个限制。
我不知道它的性能如何,但我认为你可以编写一个触发器来强制执行此操作,即使你不能直接使用索引。例如:
-- given a table stories(story_id int not null primary key, story varchar(max) not null)
CREATE TRIGGER prevent_plagiarism
ON stories
after INSERT, UPDATE
AS
DECLARE @cnt AS INT
SELECT @cnt = Count(*)
FROM stories
INNER JOIN inserted
ON ( stories.story = inserted.story
AND stories.story_id != inserted.story_id )
IF @cnt > 0
BEGIN
RAISERROR('plagiarism detected',16,1)
ROLLBACK TRANSACTION
END
如果你想预览即将删除的行并控制哪些重复行要保留,请参见http://developer.azurewebsites.net/2014/09/better-sql-group-by-find-duplicate-data/
with MYCTE as (
SELECT ROW_NUMBER() OVER (
PARTITION BY DuplicateKey1
,DuplicateKey2 -- optional
ORDER BY CreatedAt -- the first row among duplicates will be kept, other rows will be removed
) RN
FROM MyTable
)
DELETE FROM MYCTE
WHERE RN > 1
CREATE TABLE car(Id int identity(1,1), PersonId int, CarId int)
INSERT INTO car(PersonId,CarId)
VALUES(1,2),(1,3),(1,2),(2,4)
--SELECT * FROM car
;WITH CTE as(
SELECT ROW_NUMBER() over (PARTITION BY personid,carid order by personid,carid) as rn,Id,PersonID,CarId from car)
DELETE FROM car where Id in(SELECT Id FROM CTE WHERE rn>1)
ROWID()
函数替换为RowID列即可)。 - maf-soft