CREATE TABLE public.test(seq_no integer, val numeric);
INSERT INTO public.test(seq_no, val) VALUES (1, 11),(2, 6),(3, 6),(4, 6),(5, 13),(6, 6),(7, 15),(8, 6),(9, 19),(10, 10);
CREATE OR REPLACE FUNCTION public.test_cumolative_sum(arg_threshold integer)
RETURNS TABLE (seq_number integer, running_val NUMERIC, cum_sum NUMERIC, bucket integer)
LANGUAGE plpgsql AS
$$
DECLARE
var_table record;
var_cum_sum NUMERIC;
var_bucket integer;
BEGIN
var_cum_sum := 0;
var_bucket := 1;
FOR var_table IN SELECT seq_no, val FROM public.test ORDER BY seq_no LOOP
var_cum_sum := var_cum_sum + var_table.val;
RETURN query
SELECT
var_table.seq_no,
var_table.val,
var_cum_sum,
var_bucket;
IF var_cum_sum >= arg_threshold THEN
var_cum_sum := 0;
var_bucket := var_bucket + 1;
END IF;
END LOOP;
END;
$$;
SELECT * FROM public.test_cumolative_sum(20);
下面的查询递归计算了running_sum
和bucket
两列,但是所有的分组总和都不超过20。由于20是一个阈值,所以这是有意义的。或者你可以尝试调整阈值来获得期望的输出。
with recursive cte as (
select seq_no, value as running_sum, 1 as bucket
from data
where seq_no = 1
union all
select data.seq_no,
(case when running_sum + value > 20 then value
else running_sum + value
end),
(case when running_sum + value > 20 then bucket + 1
else bucket
end)
from cte join
data
on data.seq_no = cte.seq_no + 1
)
select *
from cte;
WITH recursive cte AS (
SELECT
seq_no,
value,
CASE
WHEN value > 20 THEN 1
ELSE 0
END AS reset,
value AS cum_sum,
value AS bucket
FROM your_table
WHERE seq_no = 1
UNION ALL
SELECT
t.seq_no,
t.value,
CASE
WHEN cte.cum_sum + t.value > 20 THEN cte.reset + 1
ELSE cte.reset
END,
CASE
WHEN cte.cum_sum + t.value > 20 THEN t.value
ELSE cte.cum_sum + t.value
END,
cte.reset + 1
FROM your_table t
JOIN cte ON t.seq_no = cte.seq_no + 1
)
SELECT seq_no, cum_sum, bucket FROM cte
ORDER BY seq_no;