声明数组时出现分段错误

3

我有如下代码:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

void pgmsize (char *filename, int *nx, int *ny);
void pgmread (char *filename, void *vx, int nx, int ny);
void pgmwrite(char *filename, void *vx, int nx, int ny);

#define FILENAME "edge768x768.pgm"
#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv){

    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    FILE *fp;
    char outname[64];
    sprintf(outname,"out_%d.dat",P);
    fp = fopen( outname, "w" );
    fprintf(fp,"del,iter,avg\n");

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    printf("%d,%d,%d\n",rank,M/Px,N/Py);
    double masterarray[M][N];
    double outarray[M][N];
    double local_array[M/Px][N/Py];
    double local_padded_array[M/Px+2][N/Py+2];
    double old[M/Px+2][N/Py+2];

    printf("%d,%d,%d\n",rank,Px,Py);



    fclose(fp);
    MPI_Finalize();
}

编译并运行后会出现以下错误:

mpiexec 发现在节点 My-MacBook-Air 上的进程等级为 0 的 PID 28696 因信号 11(分段错误:11)而退出。

然而,如果我将主数组的声明更改为:
float masterarray[M][N]

代码可以成功编译和运行。有人能帮忙吗?如果将所有数组声明为floats,那么也可以正常运行。也许更好地理解这两种类型之间的区别可以帮助我理解发生了什么。


继续使用所有浮点数数组编写代码后,我现在遇到了与数组相关的另一个问题。现在我的代码是:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

void pgmsize (char *filename, int *nx, int *ny);
void pgmread (char *filename, void *vx, int nx, int ny);
void pgmwrite(char *filename, void *vx, int nx, int ny);

#define FILENAME "edge768x768.pgm"
#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv){

    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    FILE *fp;
    char outname[64];
    sprintf(outname,"out_%d.dat",P);
    fp = fopen( outname, "w" );
    fprintf(fp,"del,iter,avg\n");

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    float masterarray[M][N];
    float outarray[M][N];
    float local_array[M/Px][N/Py];
    float local_padded_array[M/Px+2][N/Py+2];
    float old[M/Px+2][N/Py+2];
    float new[M/Px+2][N/Py+2];



    if (rank == 0){
        pgmread(FILENAME, masterarray, M, N);
    }

    MPI_Bcast(masterarray,M*N,MPI_FLOAT,0,MPI_COMM_WORLD);

    for(i=0;i<M/Px;i++){
        for(j=0;j<N/Py;j++){

          locali = i + coords[0] * M/Px;
          localj = j + coords[1] * N/Py;


          local_array[i][j] = masterarray[locali][localj];

        }
    }

    for (i = 0;i<M/Px +2;i++){
        for (j = 0;j<N/Py +2;j++){
            local_padded_array[i][j] = 255.0;
        }
    }

    for (i = 1;i<M/Px +1;i++){
        for (j = 1;j<N/Py +1;j++){
            local_padded_array[i][j] = local_array[i-1][j-1];
        }
    }

    for (i = 0;i<M/Px +2;i++){
        for (j = 0;j<N/Py +2;j++){
            old[i][j] = 255.0;
        }
    }

    int down_rank,up_rank,right_rank,left_rank;
    MPI_Cart_shift(cart_comm,0,1,&right_rank,&left_rank);
    MPI_Cart_shift(cart_comm,1,1,&down_rank,&up_rank);

    MPI_Datatype col;
    MPI_Type_vector(M/Px,1,N/Py+2,MPI_FLOAT,&col);
    MPI_Type_commit(&col);

    float globaldel = 1000.0;
    float globalsum = 0.0;
    double time1 = MPI_Wtime();
    for(iter = 0;iter < MAXITER;iter++){

        MPI_Issend(&old[1][N/Py], 1,col,up_rank, 0,cart_comm, &req);
        MPI_Recv(&old[1][0], 1,col, down_rank, 0,cart_comm, &status);

        MPI_Issend(&old[1][1], 1,col ,down_rank, 0,cart_comm, &req);
        MPI_Recv(&old[1][N/Py+1], 1,col ,up_rank, 0,cart_comm, &status);

        MPI_Issend(&old[M/Px][1], N/Py,MPI_FLOAT,left_rank, 0,cart_comm, &req);
        MPI_Recv(&old[0][1], N/Py,MPI_FLOAT, right_rank, 0,cart_comm, &status);

        MPI_Issend(&old[1][1], N/Py,MPI_FLOAT,right_rank, 0,cart_comm, &req);
        MPI_Recv(&old[M/Px+1][1], N/Py,MPI_FLOAT, left_rank, 0,cart_comm, &status);

        for (i = 1;i<M/Px +1;i++){
        for (j = 1;j<N/Py +1;j++){
                new[i][j] = 0.25*(old[i][j-1]+old[i][j+1]+old[i-1][j]+old[i+1][j] - local_padded_array[i][j]);
            }
         }
    }

    printf("%d,%d,%d\n",rank,M/Px,N/Py);
    fclose(fp);
    MPI_Finalize();
}

当运行时,会出现另一个分段错误,如果在最后一个循环中不设置新数组中的元素,则似乎可以纠正。因此,似乎在创建我的数组时存在一些重大问题!如果使用#define P 3而不是#define P 2,代码似乎也可以正常运行。
1个回答

2
你可能在堆栈上用尽了内存。如果你不熟悉堆和栈的区别,请查看:什么是堆和栈?它们在哪里?。如果我改用堆,就没问题了。
要从堆中为数组分配内存,请使用:
    double *masterarray = malloc(sizeof(double) * M * N);
    double *outarray = malloc(sizeof(double) * M * N);
    double *local_array = malloc(sizeof(double) * M/Px * N/Py);
    double *local_padded_array = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));
    double *old = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));

这是我对你原始代码的版本,可以正常运行(删除了一些不必要的垃圾以避免创建文件等):
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    printf("%d,%d,%d\n",rank,M/Px,N/Py);

    double *masterarray = malloc(sizeof(double) * M * N);
    double *outarray = malloc(sizeof(double) * M * N);
    double *local_array = malloc(sizeof(double) * M/Px * N/Py);
    double *local_padded_array = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));
    double *old = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));

    if (masterarray == NULL) fprintf(stderr, "MASTERARRAY == NULL");
    if (outarray == NULL) fprintf(stderr, "OUTARRAY == NULL");
    if (local_array == NULL) fprintf(stderr, "LOCAL_ARRAY == NULL");
    if (local_padded_array == NULL) fprintf(stderr, "LOCAL_PADDED_ARRAY == NULL");
    if (old == NULL) fprintf(stderr, "OLD == NULL");

    printf("%d,%d,%d\n",rank,Px,Py);

    MPI_Finalize();

    return 0;
}

没错。只要你的所有值都声明为宏,就可以了。但是,如果在单个节点上使用了太多进程,仍然可能会耗尽内存。 - Wesley Bland
因此,我正在我的具有4个内核的Mac上运行...它不适用于Px=1,Py=2,但适用于Px=1,Py=3和Px=2,Py=2。 - Josh Greenhalgh
你不能以你正在使用的方式在C语言中分配动态数组。”对于C89是正确的。自从C99出现后,可变长度数组已经存在,所示代码是完全有效的C语言代码。 - alk
好的,@alk,我很少使用用C99编写的代码,所以我没有看到人们广泛使用它。我已经更新了答案。 - Wesley Bland
1
为了确认Wes对堆栈使用的猜测,如果你添加-Wstack-usage标志,gcc会告诉我们这个信息:"code_bigstack.c:128:1: warning: stack usage might be unbounded [-Wstack-usage=]" - Rob Latham
显示剩余2条评论

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接