Postgres - 创建表格非常缓慢 - 使用分区和BRIN索引

7

我有一张包含超过2.6亿条记录的表。我已经为该表创建了分区,并对其进行了索引。

CREATE TABLE qt_al_90k
(
    rec_id integer,
    user_id integer,
    user_text text,
    user_number double precision,
    user_date date,
    user_seq integer,
    my_sequence integer
) PARTITION BY RANGE (rec_id);

分区查询:

CREATE TABLE qt_al_90k_rec_id_1 PARTITION OF qt_al_90k FOR VALUES FROM (0) TO (100000);
CREATE TABLE qt_al_90k_rec_id_2 PARTITION OF qt_al_90k FOR VALUES FROM (100000) TO (200000);
CREATE TABLE qt_al_90k_rec_id_3 PARTITION OF qt_al_90k FOR VALUES FROM (200000) TO (300000);
CREATE TABLE qt_al_90k_rec_id_4 PARTITION OF qt_al_90k FOR VALUES FROM (300000) TO (400000);
CREATE TABLE qt_al_90k_rec_id_5 PARTITION OF qt_al_90k FOR VALUES FROM (400000) TO (500000);
CREATE TABLE qt_al_90k_rec_id_6 PARTITION OF qt_al_90k FOR VALUES FROM (500000) TO (600000);
CREATE TABLE qt_al_90k_rec_id_7 PARTITION OF qt_al_90k FOR VALUES FROM (600000) TO (700000);
CREATE TABLE qt_al_90k_rec_id_8 PARTITION OF qt_al_90k FOR VALUES FROM (700000) TO (800000);
CREATE TABLE qt_al_90k_rec_id_9 PARTITION OF qt_al_90k FOR VALUES FROM (800000) TO (900000);
CREATE TABLE qt_al_90k_rec_id_10 PARTITION OF qt_al_90k FOR VALUES FROM (900000) TO (1000000);
CREATE TABLE qt_al_90k_rec_id_11 PARTITION OF qt_al_90k FOR VALUES FROM (1000000) TO (1100000);
CREATE TABLE qt_al_90k_rec_id_12 PARTITION OF qt_al_90k FOR VALUES FROM (1100000) TO (1200000);
CREATE TABLE qt_al_90k_rec_id_13 PARTITION OF qt_al_90k FOR VALUES FROM (1200000) TO (1300000);
CREATE TABLE qt_al_90k_rec_id_14 PARTITION OF qt_al_90k FOR VALUES FROM (1300000) TO (1400000);
CREATE TABLE qt_al_90k_rec_id_15 PARTITION OF qt_al_90k FOR VALUES FROM (1400000) TO (1500000);
CREATE TABLE qt_al_90k_rec_id_16 PARTITION OF qt_al_90k FOR VALUES FROM (1500000) TO (1600000);
CREATE TABLE qt_al_90k_rec_id_17 PARTITION OF qt_al_90k FOR VALUES FROM (1600000) TO (1700000);
CREATE TABLE qt_al_90k_rec_id_18 PARTITION OF qt_al_90k FOR VALUES FROM (1700000) TO (1800000);
CREATE TABLE qt_al_90k_rec_id_19 PARTITION OF qt_al_90k FOR VALUES FROM (1800000) TO (1900000);
CREATE TABLE qt_al_90k_rec_id_20 PARTITION OF qt_al_90k FOR VALUES FROM (1900000) TO (2000000);
CREATE TABLE qt_al_90k_rec_id_21 PARTITION OF qt_al_90k FOR VALUES FROM (2000000) TO (2100000);
CREATE TABLE qt_al_90k_rec_id_22 PARTITION OF qt_al_90k FOR VALUES FROM (2100000) TO (2200000);
CREATE TABLE qt_al_90k_rec_id_23 PARTITION OF qt_al_90k FOR VALUES FROM (2200000) TO (2300000);
CREATE TABLE qt_al_90k_rec_id_24 PARTITION OF qt_al_90k FOR VALUES FROM (2300000) TO (2400000);
CREATE TABLE qt_al_90k_rec_id_25 PARTITION OF qt_al_90k FOR VALUES FROM (2400000) TO (2500000);
CREATE TABLE qt_al_90k_rec_id_26 PARTITION OF qt_al_90k FOR VALUES FROM (2500000) TO (2600000);
CREATE TABLE qt_al_90k_rec_id_27 PARTITION OF qt_al_90k FOR VALUES FROM (2600000) TO (3000000);

索引查询:

CREATE INDEX qt_al_90k_user_id_1 ON qt_al_90k_rec_id_1 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_1 ON qt_al_90k_rec_id_1 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_2 ON qt_al_90k_rec_id_2 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_2 ON qt_al_90k_rec_id_2 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_3 ON qt_al_90k_rec_id_3 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_3 ON qt_al_90k_rec_id_3 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_4 ON qt_al_90k_rec_id_4 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_4 ON qt_al_90k_rec_id_4 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_5 ON qt_al_90k_rec_id_5 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_5 ON qt_al_90k_rec_id_5 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_6 ON qt_al_90k_rec_id_6 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_6 ON qt_al_90k_rec_id_6 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_7 ON qt_al_90k_rec_id_7 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_7 ON qt_al_90k_rec_id_7 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_8 ON qt_al_90k_rec_id_8 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_8 ON qt_al_90k_rec_id_8 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_9 ON qt_al_90k_rec_id_9 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_9 ON qt_al_90k_rec_id_9 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_10 ON qt_al_90k_rec_id_10 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_10 ON qt_al_90k_rec_id_10 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_11 ON qt_al_90k_rec_id_11 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_11 ON qt_al_90k_rec_id_11 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_12 ON qt_al_90k_rec_id_12 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_12 ON qt_al_90k_rec_id_12 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_13 ON qt_al_90k_rec_id_13 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_13 ON qt_al_90k_rec_id_13 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_14 ON qt_al_90k_rec_id_14 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_14 ON qt_al_90k_rec_id_14 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_15 ON qt_al_90k_rec_id_15 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_15 ON qt_al_90k_rec_id_15 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_16 ON qt_al_90k_rec_id_16 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_16 ON qt_al_90k_rec_id_16 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_17 ON qt_al_90k_rec_id_17 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_17 ON qt_al_90k_rec_id_17 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_18 ON qt_al_90k_rec_id_18 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_18 ON qt_al_90k_rec_id_18 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_19 ON qt_al_90k_rec_id_19 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_19 ON qt_al_90k_rec_id_19 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_20 ON qt_al_90k_rec_id_20 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_20 ON qt_al_90k_rec_id_20 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_21 ON qt_al_90k_rec_id_21 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_21 ON qt_al_90k_rec_id_21 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_22 ON qt_al_90k_rec_id_22 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_22 ON qt_al_90k_rec_id_22 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_23 ON qt_al_90k_rec_id_23 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_23 ON qt_al_90k_rec_id_23 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_24 ON qt_al_90k_rec_id_24 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_24 ON qt_al_90k_rec_id_24 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_25 ON qt_al_90k_rec_id_25 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_25 ON qt_al_90k_rec_id_25 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_26 ON qt_al_90k_rec_id_26 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_26 ON qt_al_90k_rec_id_26 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_27 ON qt_al_90k_rec_id_27 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_27 ON qt_al_90k_rec_id_27 USING brin(rec_id);

这是我的检索查询:
create table test123 as
select user_id,user_text, rec_id, user_seq 
 from qt_al_90k 
 where rec_id in (
  1492,1493,1494,1495,1496,1497,1498,1499,1500,1501) and 
 user_id in (
  37
 );

此查询应返回大约50万条记录。然而,创建过程超过11分钟。当我传递100个以上的rec_id时,查询永远不会返回。
我需要处理1000多个rec_id,但是我无法完成。
如何提高创建表语句的性能?请帮忙。
编辑:
这是EXPLAIN(ANALYZE,BUFFERS)的内容。
"Gather  (cost=1607.98..2817973.44 rows=257182 width=29) (actual time=119.210..344297.379 rows=500000 loops=1)"
"  Workers Planned: 2"
"  Workers Launched: 2"
"  Buffers: shared hit=49963 read=43384"
"  I/O Timings: read=509470.717"
"  ->  Parallel Append  (cost=607.98..2791255.24 rows=107159 width=29) (actual time=116.662..343151.672 rows=166667 loops=3)"
"        Buffers: shared hit=49963 read=43384"
"        I/O Timings: read=509470.717"
"        ->  Parallel Bitmap Heap Scan on qt_al_90k_rec_id_1  (cost=607.98..2790719.45 rows=107159 width=29) (actual time=116.661..343137.730 rows=166667 loops=3)"
"              Recheck Cond: ((rec_id = ANY ('{1492,1493,1494,1495,1496,1497,1498,1499,1500,1501}'::integer[])) AND (va_id = 37))"
"              Rows Removed by Index Recheck: 3862086"
"              Heap Blocks: lossy=30808"
"              Buffers: shared hit=49963 read=43384"
"              I/O Timings: read=509470.717"
"              ->  BitmapAnd  (cost=607.98..607.98 rows=4480328 width=0) (actual time=92.004..92.004 rows=0 loops=1)"
"                    Buffers: shared hit=395 read=24"
"                    I/O Timings: read=0.082"
"                    ->  Bitmap Index Scan on qt_al_90k_rec_1  (cost=0.00..116.02 rows=5232168 width=0) (actual time=47.230..47.230 rows=8016640 loops=1)"
"                          Index Cond: (rec_id = ANY ('{1492,1493,1494,1495,1496,1497,1498,1499,1500,1501}'::integer[]))"
"                          Buffers: shared hit=381 read=1"
"                          I/O Timings: read=0.011"
"                    ->  Bitmap Index Scan on qt_al_90k_user_id_1  (cost=0.00..363.11 rows=134190580 width=0) (actual time=44.130..44.130 rows=11761920 loops=1)"
"                          Index Cond: (va_id = 37)"
"                          Buffers: shared hit=14 read=23"
"                          I/O Timings: read=0.071"
"Planning Time: 1.814 ms"
"Execution Time: 344320.891 ms"

请为该查询显示一个 EXPLAIN (ANALYZE, BUFFERS) - jjanes
@jjanes在问题中添加 - dang
3个回答

2

行的物理顺序是否与user_id(或va_id,无论列的真实名称是什么)和rec_id列的值密切相关?如果不是,则您拥有的BRIN索引几乎没有用处。您可以尝试改用BTREE索引。或者更好的是,在分区qt_al_90k_rec_id_1上建立一个多列索引(va_id, rec_id)。为了测试目的,您可以仅在该分区上构建它。

您的数据缓存效果很差。但是,这是因为无法很好地缓存它(您没有足够的内存),还是因为在运行查询时恰好没有缓存?由于大部分时间都在读取表数据,更有效地使用索引可能帮助不大,因为它仍然需要读取表数据(除非您添加要选择的其他列并获得仅索引扫描)

您的IO系统如何?如果您有RAID或JBOD,则增加effective_io_concurrency可能会有所帮助。

我需要通过这个处理1000多个rec_id

它们分别属于多少个不同的分区?

也很想看看此查询的EXPLAIN。由于它从未完成,因此无法执行EXPLAIN(ANALYZE)

此外,您需要多久运行一次此操作?这不像您经常运行的查询。为其构建索引可能不合适,但更好的索引可能对其他查询也有用。


1
这是正确的答案。BRIN索引在两个不同的列上工作的可能性非常小 - 两个列必须完全对齐。索引的选择是错误的。 - Laurenz Albe

2
我猜你的分区本身已经成为了一个负担。如果记录ID是每个分区的最后一个ID,那该怎么办呢?
更好的方法是在同一张表上建立一个rec_id索引,在这种情况下使用exists而不是IN子句进行快速检索。

0

这个表并不是很大,实际上,你可以跳过分区并使用默认的btree双列索引。


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接