在GROUP BY中使用LIMIT获取每个组的N个结果?

526

以下查询:

SELECT
year, id, rate
FROM h
WHERE year BETWEEN 2000 AND 2009
AND id IN (SELECT rid FROM table2)
GROUP BY id, year
ORDER BY id, rate DESC
产生:
year    id  rate
2006    p01 8
2003    p01 7.4
2008    p01 6.8
2001    p01 5.9
2007    p01 5.3
2009    p01 4.4
2002    p01 3.9
2004    p01 3.5
2005    p01 2.1
2000    p01 0.8
2001    p02 12.5
2004    p02 12.4
2002    p02 12.2
2003    p02 10.3
2000    p02 8.7
2006    p02 4.6
2007    p02 3.3
我想要的只是每个id的前5个结果:
2006    p01 8
2003    p01 7.4
2008    p01 6.8
2001    p01 5.9
2007    p01 5.3
2001    p02 12.5
2004    p02 12.4
2002    p02 12.2
2003    p02 10.3
2000    p02 8.7

有没有一种类似于LIMIT的修饰符可以在GROUP BY内使用?


14
这可以在MySQL中完成,但并不像添加“LIMIT”子句那么简单。下面是一篇详细解释该问题的文章:如何在SQL中选择每组的第一个/最小/最大行 这是一篇好文章 - 作者介绍了一个优雅而朴素的解决“每组前N个”问题的方法,然后逐步改进了它。 - danben
SELECT * FROM (SELECT year, id, rate FROM h WHERE year BETWEEN 2000 AND 2009 AND id IN (SELECT rid FROM table2) GROUP BY id, year ORDER BY id, rate DESC) LIMIT 5 - Mixcoatl
3
这个问题已经通过引入SQL窗口函数来解决,具体解释可以参考这个答案:https://dev59.com/B2sz5IYBdhLWcg3wpZvy#38854846 - Paramvir Singh Karwal
2
@danben 新链接 - syahid246
14个回答

1

试试这个:

SET @num := 0, @type := '';
SELECT `year`, `id`, `rate`,
    @num := if(@type = `id`, @num + 1, 1) AS `row_number`,
    @type := `id` AS `dummy`
FROM (
    SELECT *
    FROM `h`
    WHERE (
        `year` BETWEEN '2000' AND '2009'
        AND `id` IN (SELECT `rid` FROM `table2`) AS `temp_rid`
    )
    ORDER BY `id`
) AS `temph`
GROUP BY `year`, `id`, `rate`
HAVING `row_number`<='5'
ORDER BY `id`, `rate DESC;

1

请尝试以下存储过程。我已经验证过了。我可以获得正确的结果,但是没有使用groupby

CREATE DEFINER=`ks_root`@`%` PROCEDURE `first_five_record_per_id`()
BEGIN
DECLARE query_string text;
DECLARE datasource1 varchar(24);
DECLARE done INT DEFAULT 0;
DECLARE tenants varchar(50);
DECLARE cur1 CURSOR FOR SELECT rid FROM demo1;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;

    SET @query_string='';

      OPEN cur1;
      read_loop: LOOP

      FETCH cur1 INTO tenants ;

      IF done THEN
        LEAVE read_loop;
      END IF;

      SET @datasource1 = tenants;
      SET @query_string = concat(@query_string,'(select * from demo  where `id` = ''',@datasource1,''' order by rate desc LIMIT 5) UNION ALL ');

       END LOOP; 
      close cur1;

    SET @query_string  = TRIM(TRAILING 'UNION ALL' FROM TRIM(@query_string));  
  select @query_string;
PREPARE stmt FROM @query_string;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;

END

1

对于像我这样的人,经常会遇到查询超时的情况。我制作了以下内容,以便通过特定组使用限制和其他任何内容。

DELIMITER $$
CREATE PROCEDURE count_limit200()
BEGIN
    DECLARE a INT Default 0;
    DECLARE stop_loop INT Default 0;
    DECLARE domain_val VARCHAR(250);
    DECLARE domain_list CURSOR FOR SELECT DISTINCT domain FROM db.one;

    OPEN domain_list;

    SELECT COUNT(DISTINCT(domain)) INTO stop_loop 
    FROM db.one;
    -- BEGIN LOOP
    loop_thru_domains: LOOP
        FETCH domain_list INTO domain_val;
        SET a=a+1;

        INSERT INTO db.two(book,artist,title,title_count,last_updated) 
        SELECT * FROM 
        (
            SELECT book,artist,title,COUNT(ObjectKey) AS titleCount, NOW() 
            FROM db.one 
            WHERE book = domain_val
            GROUP BY artist,title
            ORDER BY book,titleCount DESC
            LIMIT 200
        ) a ON DUPLICATE KEY UPDATE title_count = titleCount, last_updated = NOW();

        IF a = stop_loop THEN
            LEAVE loop_thru_domain;
        END IF;
    END LOOP loop_thru_domain;
END $$

它循环遍历域名列表,然后仅插入每个列表的前200个。

0

我刚刚为MYSQL创建了一个顶部操作。代码很简单。

drop table if exists h;
create table h(id varchar(5), year int, rate numeric(8,2), primary key(id,year));
insert into h(year, id, rate) values
(2006,'p01',8),
(2003,'p01',7.4),
(2008,'p01',6.8),
(2001,'p01',5.9),
(2007,'p01',5.3),
(2009,'p01',4.4),
(2002,'p01',3.9),
(2004,'p01',3.5),
(2005,'p01',2.1),
(2000,'p01',0.8),
(2001,'p02',12.5),
(2004,'p02',12.4),
(2002,'p02',12.2),
(2003,'p02',10.3),
(2000,'p02',8.7),
(2006,'p02',4.6),
(2007,'p02',3.3);

select id, year, rate
from 
(
    select id, year, rate, @last, if(@last=id,@top:=@top+1, @top:=0) as ztop, @last:=id update_last
    from h
    order by id, rate desc, year desc
) t2
where ztop<5

你是否检查了关于内部查询的执行计划?在获取前五个项目之前,MySQL是否会扫描整个表以满足内部查询?这听起来像是一个性能陷阱。 - Timo
@Timo 这和最佳答案一样糟糕。 - user19714507

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接