我一直在网上(和stackoverflow)搜索关于一维数组(或向量)是否比它们的二维对应物更快的意见。总的结论似乎是一维数组最快。然而,我写了一个简短的测试程序来自己验证,结果表明二维数组最好。有人能找到我的测试中的错误,或者至少解释为什么我得到这个结果吗?
我用它来存储矩阵,因此需要同时使用行和列索引1维数组。
我得到了以下输出:
我用它来存储矩阵,因此需要同时使用行和列索引1维数组。
#include <iostream>
#include <chrono>
#include <vector>
uint64_t timestamp()
{
namespace sc = std::chrono;
static auto start = sc::high_resolution_clock::now();
return sc::duration_cast<sc::duration<uint64_t, std::micro>>(sc::high_resolution_clock::now() - start).count();
}
int main(int argc, char** argv)
{
if (argc < 3)
return 0;
size_t size = atoi(argv[1]);
size_t repeat = atoi(argv[2]);
int** d2 = (int**)malloc(size*sizeof(int*));
for (size_t i = 0; i < size; ++i)
d2[i] = (int*)malloc(size*sizeof(int));
int* d1 = (int*)malloc(size*size*sizeof(int));
std::vector<std::vector<int> > d2v(size);
for (auto& i : d2v)
i.resize(size);
std::vector<int> d1v(size*size);
uint64_t start, end;
timestamp();
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t r = 0; r < size; ++r)
{
for (size_t c = 0; c < size; ++c)
{
if (r == 0)
d2[r][c] = 0;
else
d2[r][c] = d2[r-1][c] + 1;
}
}
}
end = timestamp();
std::cout << "2D array\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t c = 0; c < size; ++c)
{
for (size_t r = 0; r < size; ++r)
{
if (r == 0)
d2[r][c] = 0;
else
d2[r][c] = d2[r-1][c] + 1;
}
}
}
end = timestamp();
std::cout << "2D array C\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t r = 0; r < size; ++r)
{
for (size_t c = 0; c < size; ++c)
{
if (r == 0)
d1[r + c*size] = 0;
else
d1[r + c*size] = d1[r-1 + c*size] + 1;
}
}
}
end = timestamp();
std::cout << "1D array\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t c = 0; c < size; ++c)
{
for (size_t r = 0; r < size; ++r)
{
if (r == 0)
d1[r + c*size] = 0;
else
d1[r + c*size] = d1[r-1 + c*size] + 1;
}
}
}
end = timestamp();
std::cout << "1D array C\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t r = 0; r < size; ++r)
{
for (size_t c = 0; c < size; ++c)
{
if (r == 0)
d2v[r][c] = 0;
else
d2v[r][c] = d2v[r-1][c] + 1;
}
}
}
end = timestamp();
std::cout << "2D vector\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t c = 0; c < size; ++c)
{
for (size_t r = 0; r < size; ++r)
{
if (r == 0)
d2v[r][c] = 0;
else
d2v[r][c] = d2v[r-1][c] + 1;
}
}
}
end = timestamp();
std::cout << "2D vector C\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t r = 0; r < size; ++r)
{
for (size_t c = 0; c < size; ++c)
{
if (r == 0)
d1v[r + c*size] = 0;
else
d1v[r + c*size] = d1v[r-1 + c*size] + 1;
}
}
}
end = timestamp();
std::cout << "1D vector\t" << size << "\t" << end - start << std::endl;
start = timestamp();
for (size_t n = 0; n < repeat; ++n)
{
for (size_t c = 0; c < size; ++c)
{
for (size_t r = 0; r < size; ++r)
{
if (r == 0)
d1v[r + c*size] = 0;
else
d1v[r + c*size] = d1v[r-1 + c*size] + 1;
}
}
}
end = timestamp();
std::cout << "1D vector C\t" << size << "\t" << end - start << std::endl;
return 0;
}
我得到了以下输出:
user@user-debian64:~/matrix$ ./build/test/index_test 1000 100
2D array 1000 79593
2D array C 1000 326695
1D array 1000 440695
1D array C 1000 262251
2D vector 1000 73648
2D vector C 1000 418287
1D vector 1000 371433
1D vector C 1000 269355
user@user-debian64:~/matrix$ ./build/test/index_test 10000 1
2D array 10000 149748
2D array C 10000 3507346
1D array 10000 2754570
1D array C 10000 257997
2D vector 10000 92041
2D vector C 10000 3791745
1D vector 10000 3384403
1D vector C 10000 266811
-O3
并不总是更快,它会添加一些像-fast-math
这样的东西,可能是不需要的。不要只使用-O3
。你也可以选择优化大小,然后进行优化。-O2
应该是一个合理的默认值。如果需要了解,请进行测量。 - rubenvb