轻松进行C代码基准测试
#include <time.h>
int main(void) {
clock_t start_time = clock();
double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
printf("Done in %f seconds\n", elapsed_time);
}
多线程C代码的简单基准测试
如果您想对多线程程序进行基准测试,首先需要仔细查看clock:
描述
clock()函数返回程序使用的处理器时间的近似值。
返回值
返回的值是CPU时间作为clock_t; 为了获得使用的秒数,需要除以CLOCKS_PER_SEC。如果不可用或其值无法表示所使用的处理器时间,则该函数返回值(clock_t)(-1)
因此,非常重要的是将经过的时间除以线程数量,以获取函数的执行时间:
#include <time.h>
#include <omp.h>
#define THREADS_NB omp_get_max_threads()
#pragma omp parallel for private(i) num_threads(THREADS_NB)
clock_t start_time = clock();
double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
printf("Done in %f seconds\n", elapsed_time / THREADS_NB);
示例
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <omp.h>
#define N 20000
#define THREADS_NB omp_get_max_threads()
void init_arrays(double *a, double *b) {
memset(a, 0, sizeof(a));
memset(b, 0, sizeof(b));
for (int i = 0; i < N; i++) {
a[i] += 1.0;
b[i] += 1.0;
}
}
double func2(double i, double j) {
double res = 0.0;
while (i / j > 0.0) {
res += i / j;
i -= 0.1;
j -= 0.000003;
}
return res;
}
double single_thread(double *a, double *b) {
double res = 0;
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
if (i == j) continue;
res += func2(a[i], b[j]);
}
}
return res;
}
double multi_threads(double *a, double *b) {
double res = 0;
int i, j;
#pragma omp parallel for private(j) num_threads(THREADS_NB) reduction(+:res)
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
if (i == j) continue;
res += func2(a[i], b[j]);
}
}
return res;
}
int main(void) {
double *a, *b;
a = (double *)calloc(N, sizeof(double));
b = (double *)calloc(N, sizeof(double));
init_arrays(a, b);
clock_t start_time = clock();
double res = single_thread(a, b);
double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
printf("Default: Done with %f in %f sd\n", res, elapsed_time);
start_time = clock();
res = multi_threads(a, b);
elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
printf("With OMP: Done with %f in %f sd\n", res, elapsed_time / THREADS_NB);
}
使用以下命令进行编译:
gcc -O3 multithread_benchmark.c -fopenmp && time ./a.out
输出:
Default: Done with 2199909813.614555 in 4.909633 sd
With OMP: Done with 2199909799.377532 in 1.708831 sd
real 0m6.703s (from time function)
perf stat
的性能计数器结果,并且意味着您可以使用外部计时工具,例如time ./a.out
,而无需在C中包含计时代码。尽管如此,在程序中使用计时代码可以避免计时初始化代码。并且一个程序的多个结果更简单。 - Peter Cordes