PostgreSQL中简单的libpq代码速度太慢？

Question

PostgreSQL中简单的libpq代码速度太慢？

5

我正在使用libpq处理postgresql。下面给出的代码需要很长时间才能运行（时间在代码末尾给出）。

#include "stdafx.h"
#include <stdlib.h>
#include <libpq-fe.h>
#include <windows.h>

static void exit_nicely(PGconn *conn)
{
    PQfinish(conn);
    exit(1);
}

int _tmain(int argc, _TCHAR* argv[])
{
    const TCHAR *conninfo;
    PGconn     *conn;
    PGresult   *res;
    int nFields, i, j;

    if (argc > 1)
        conninfo = argv[1];
    else
        conninfo = _T("hostaddr=192.168.4.171 port=12345 dbname=mydb user=myname password=mypass");

    conn = PQconnectdb(conninfo);
    if (PQstatus(conn) != CONNECTION_OK)
    {
        fprintf(stderr, "Connection to database failed: %s",
                PQerrorMessage(conn));
        exit_nicely(conn);
    }

    /* Start a transaction block */
    res = PQexec(conn, "BEGIN");
    if (PQresultStatus(res) != PGRES_COMMAND_OK)
    {
        fprintf(stderr, "BEGIN command failed: %s", PQerrorMessage(conn));
        PQclear(res);
        exit_nicely(conn);
    }

    TCHAR szVal1[200];
    TCHAR szVal2[200];
    TCHAR szBuffer[200];

    TCHAR *paramValues[2];
    int paramLengths[2];
    int paramFormats[2] = {0,0};

    ExecStatusType eStatus;

    LARGE_INTEGER li;
    QueryPerformanceFrequency(&li);
    double dAppFreq = double(li.QuadPart)/1000.0;

    QueryPerformanceCounter(&li);
    LONGLONG siStartCounter = li.QuadPart;

    TCHAR szStmt[512] = {0};
    _tcscpy_s(szStmt, 512, _T("Insert50k"));
    Oid oidTypes[2] = {0,0};

    PGresult *pRes =    PQprepare(conn,
                        szStmt,
                        _T("insert into details values($1,$2);"),
                        2,
                        oidTypes);
    QueryPerformanceCounter(&li);
    LONGLONG siEndCounter = li.QuadPart;
    LONGLONG siLoop = 0;

    double dDiff = (siEndCounter - siStartCounter)/dAppFreq;
    printf("Prepared %.2lf\n", dDiff);

    for(int i=0; i<50000; i++)
    {
        _stprintf_s(szVal1, 200, _T("%d"), i);
        _stprintf_s(szVal2, 200, _T("Detail%d"), i);

        paramValues[0] = szVal1;
        paramValues[1] = szVal2;

        paramLengths[0] = _tcslen(szVal1);
        paramLengths[1] = _tcslen(szVal2);

        siStartCounter = siEndCounter;
        pRes = PQexecPrepared(conn,
                         szStmt,
                         2,
                         paramValues,
                         paramLengths,
                         paramFormats,
                         0);
        QueryPerformanceCounter(&li);
        siEndCounter = li.QuadPart;
        siLoop += (siEndCounter - siStartCounter);

        eStatus = PQresultStatus(res);
        if (!res ||  (eStatus != PGRES_COMMAND_OK) )
        {
            PQclear(res);
            exit_nicely(conn);
        } 
    }

    dDiff = siLoop/dAppFreq;
    printf("Inserted %.2lf\n", dDiff);

    siStartCounter = siEndCounter;


    _tcscpy_s(szBuffer,200, _T("select count(*) from programdetails;"));
    res = PQexec(conn, szBuffer);

    eStatus = PQresultStatus(res);
    if (!res ||  (eStatus != PGRES_TUPLES_OK) )
    {
        PQclear(res);
        exit_nicely(conn);
    }

    /* first, print out the attribute names */
    nFields = PQnfields(res);
    for (i = 0; i < nFields; i++)
        printf("%-15s", PQfname(res, i));
    printf("\n\n");

    /* next, print out the rows */
    for (i = 0; i < PQntuples(res); i++)
    {
        for (j = 0; j < nFields; j++)
            printf("%-15s", PQgetvalue(res, i, j));
        printf("\n");
    }

    QueryPerformanceCounter(&li);
    siEndCounter = li.QuadPart;
    dDiff = (siEndCounter - siStartCounter)/dAppFreq;
    printf("Printed %.2lf\n", dDiff);

    /* end the transaction */
    res = PQexec(conn, "COMMIT");
    PQclear(res);

    /* close the connection to the database and cleanup */
    PQfinish(conn);

    return 0;
}

一个样例输出（以毫秒为单位）：

Prepared 0.55
Inserted 5527.52
count

50000
Printed 7.58

这里首先准备查询，然后执行。这个简单的插入操作需要约5.5秒钟的时间。有没有更好的方法来完成相同的操作，或者我在这里做错了什么？

- c0da

2

你只是试图发送50000个请求，这很正常！也许你可以调整库以在同一时间内发送所有请求，这将更快。此外，服务器是否在本地？如果不是，更好的网络也可能会有所不同。 - Geoffroy

1

这真的运行了1.5小时吗？ - vyegorov

@vyegorov 结果以毫秒为单位。 - c0da

@Geoffroy 在这种情况下，服务器在我的本地机器上。但是它也可以远程访问。它连接速度为1Gbps。我需要如何调整lib以在同一时间发送整个请求？有什么建议吗？ - c0da

1

@c0da 尝试搜索批量插入，这是你想要做的。 - Geoffroy

@Geoffroy，在处理小数据包时，1千兆或1兆的带宽并不重要；你主要会遇到延迟而不是吞吐量的问题。每个请求都需要等待回复才能继续工作，这样效率很低；你需要对请求进行分组，同时并行处理多个请求等等。Pg不支持多个并发请求（可惜），但你肯定可以进行分批处理，或在这种情况下使用COPY或多行插入。 - Craig Ringer

2个回答

3

我有一个类似的问题，将我的一系列插入操作转换成为一个多行插入操作。虽然添加了很多字符串处理和strcat调用，但这显著提高了性能：

1000 rows:
Individual Inserts: 22.609s
Multirow Insert: 1.217s

代码在https://gist.github.com/Meekohi/11291680，还展示了将二进制数据插入到一个列中的示例。

- Meekohi

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Daniel Vérité · Accepted Answer

在TCP连接上，每个INSERT都会导致一次到数据库的TCP往返。5.5秒内完成50000个插入意味着一个TCP往返需要约0.1毫秒。您需要将其与网络设备的TCP基准进行比较，但可能不能指望使用此方法更快。

您应该考虑使用而不是单独的INSERT。在内部，这将缓冲内容，并且由于向服务器发送的往返次数大大减少，您可能会看到明显的速度提高。

有关与此COPY形式相关的libpq API，请参见http://www.postgresql.org/docs/current/static/libpq-copy.html。