使用C语言将文件名复制到数组中

3

我正在尝试找到某个类型(这里是硬编码为tif)的目录中的所有文件,并将它们复制到一个数组中。一切都可以编译通过(gcc -Wall没有错误或警告),但存在一些内存问题。虽然我编写的程序似乎运行良好(没有segfaults),但有些文件名是奇怪的字符,这是当您的字符串中除ASCII值以外的其他东西时出现的。这导致我使用valgrind运行,显示错误(以下是输出),但我无法追踪实际的问题。在某些目录中,valgrind本身会出现segfaults(程序在同一目录中运行正常)。

#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <search.h>
#include <string.h>
#include <error.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>



#define min(X, Y)  ((X) < (Y) ? (X) : (Y))

int exitStatus = 0;

/*------------------------------------------------------------------------------
* array_find
*
* ARGS - Takes a pointer to a string, a pointer to an array of strings, and an
* int representing the length of the array.
*
* RETURN - returns an int indicating the first index of the key in the array,
* or -1 if the key was not found
*-----------------------------------------------------------------------------*/

int array_find(char *key, char *argv[], int argc){
    int i;
    for (i = 0; i < argc; i++)
    {
        #ifdef DEBUG_array_find
        printf("strncmp(%s, %s, %d) = %d\n", key, argv[i], min(strlen(key), strlen(argv[i])), strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))));
        #endif
        if (strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))) == 0)
        {
            return i;
        }
    }
    return -1;
}


/*------------------------------------------------------------------------------
* ends_with
*
* ARGS - str = string to be checked
*        sub = string to look for
*
* RETURN - Returns true if str ends with sub or both strings are NULL.
           False otherwise.
*-----------------------------------------------------------------------------*/

bool ends_with(char *str, char *sub){
    if (str == NULL && sub == NULL)
    {
        return true;
    }
    if (str == NULL || sub == NULL)
    {
        return false;
    }
    char *last_instance_of_sub = rindex(str, *sub); //Finds the last index of the first char of sub
    int sub_len = strlen(sub);
    if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
    {
        return false;
    }
    return strncmp(last_instance_of_sub, sub, sub_len) == 0;
}

int main(int argc, char *argv[])
{
    /*Parse args*/
    DIR *dir;
    int index = array_find("-d", argv, argc);
    char *dirname;
    if (index >= 0)
    {
        dirname = argv[index + 1];
        dir = opendir(dirname);
    }
    else
    {
        dirname = getcwd(NULL, 0);
        if (dirname == NULL)
        {
            perror("Error getting current directory name.");
            exit(1);
        }
        dir = opendir(dirname);
    }
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    #ifdef DEBUG_MAIN
        printf("dirname = %s\n", dirname);
    #endif

    int threads = 1;
    index = array_find("-t", argv, argc);
    if (index >= 0)
    {
        threads = atoi(argv[index + 1]);
    }
    #ifdef DEBUG_MAIN
        printf("threads = %d\n", threads);
    #endif

    struct dirent *entry = readdir(dir);
    int num_files = 0;
    while (entry != NULL)
    {
        if (ends_with(entry->d_name, ".tif")){
            #ifdef DEBUG_MAIN
                printf("%s\n", entry->d_name);
            #endif
            num_files++;
        }
        entry = readdir(dir);
    }

    if (closedir(dir) != 0)
    {
        perror("Failed to close directory.");
    }

    #ifdef DEBUG_MAIN
        printf("Num files = %d\n", num_files);
    #endif

    dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    entry = readdir(dir);

    char *file_names[num_files];
    int i = 0;
    for(; entry != NULL; i++)
    {
        if (ends_with(entry->d_name, ".tif")){
            file_names[i] = strdup(entry->d_name);
            if (file_names[i] == NULL)
            {
                perror("Could not create the filename array.\n");
                exit(1);
            }
        }
        entry = readdir(dir);
    }

/*    #ifdef DEBUG_MAIN*/
        for (i = 0; i < num_files; i++)
        {
            printf("%s\n", file_names[i]);
/*            free(file_names[i]);*/
        }
/*    #endif*/



    free(dir);
    return exitStatus;
}

Valgrind 输出:

    ==24488== Memcheck, a memory error detector
==24488== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==24488== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==24488== Command: ./myprogram -d /home/chris/Pictures/Catalinas\ with\ Christie/Processed/
==24488== 
dirname = /home/chris/Pictures/Catalinas with Christie/Processed/
threads = 1
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
DSC_1293_lzn.tif
DSC_1631_lzn.tif
DSC_1418_lzn.tif
DSC_1315_2crop_lzn.tif
DSC_1377_lzn2crop.tif
DSC_2167_lzn.tif
1981-1985-HDR3_lzn2.tif
DSC_2129_lzn.tif
DSC_1448_lzn.tif
DSC_1607_lzn.tif
DSC_1564_lzn.tif
DSC_2052-DSC_2072_lzn.tif
DSC_1487_lzn.tif
DSC_1591_2_lzn.tif
DSC_2124_lzn.tif
DSC_1622_lzn.tif
DSC_2157_lzn.tif
DSC_1685_lzn.tif
Num files = 24
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
==24488== Use of uninitialised value of size 8
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488== 
==24488== Invalid read of size 1
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488==  Address 0x0 is not stack'd, malloc'd or (recently) free'd
==24488== 
==24488== 
==24488== Process terminating with default action of signal 11 (SIGSEGV)
==24488==  Access not within mapped region at address 0x0
==24488==    at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488==    by 0x4EA4ECB: puts (ioputs.c:36)
==24488==    by 0x400D52: main (batch-convert.c:161)
==24488==  If you believe this happened as a result of a stack
==24488==  overflow in your program's main thread (unlikely but
==24488==  possible), you can try to increase the size of the
==24488==  main thread stack using the --main-stacksize= flag.
==24488==  The main thread stack size used in this run was 8388608.
==24488== 
==24488== HEAP SUMMARY:
==24488==     in use at exit: 33,243 bytes in 25 blocks
==24488==   total heap usage: 26 allocs, 1 frees, 66,051 bytes allocated
==24488== 
==24488== LEAK SUMMARY:
==24488==    definitely lost: 0 bytes in 0 blocks
==24488==    indirectly lost: 0 bytes in 0 blocks
==24488==      possibly lost: 0 bytes in 0 blocks
==24488==    still reachable: 33,243 bytes in 25 blocks
==24488==         suppressed: 0 bytes in 0 blocks
==24488== Rerun with --leak-check=full to see details of leaked memory
==24488== 
==24488== For counts of detected and suppressed errors, rerun with: -v
==24488== Use --track-origins=yes to see where uninitialised values come from
==24488== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 2 from 2)
Segmentation fault (core dumped)

我已经很久没有使用C语言了,但据我所知 (从手册中了解到),strdup应该使用malloc在堆上分配内存,用于拷贝字符串。在我记起strdup函数之前,我试图手动执行同样的操作,但是遇到了相同的错误。我认为可能是我的代码有缺陷,而且认为strdup函数会处理它,但显然还有其他问题。

有人能告诉我我做错了什么吗?

编辑1: 按照要求,我已添加了程序的完整源代码。另外,对于那些说要检查i是否等于num_files的人,你会看到,事先我就数了tif文件的数量,所以我知道将被复制到数组中的文件的确切数量,因此检查索引是不必要的。

另外,作为一个注释,该程序是使用DEBUG_MAIN定义编译的,因此任何在#ifdef DEBUG_MAIN块中的内容都会运行。没有定义其他调试标志。


entry 的初始值是什么? - user4815162342
3
  1. 请提供一个实际的 SSCCE,而不是只提供片段。
  2. 您的程序能在仅包含 ASCII 文件名的目录中工作吗?
  3. 没有相应的源代码(以及相应的行号),Valgrind 的输出并没有任何帮助。
- thkala
1
另外,你记得用 i 的最终值更新 num_files 吗?你是否将具有自动存储类的 file_names 数组返回给调用者?发布一个完整的示例将解决所有这些问题,而无需询问。 - user4815162342
@user4815162342:同意,可能有无数种问题,从结构体超出范围到多字节字符集问题,再到错误条件没有被正确处理。 - thkala
valgrind输出与本代码无关,我看不到puts的调用。 - Nahuel Fouilleul
显示剩余5条评论
3个回答

1
在你的代码中,这部分 for(; entry != NULL; i++) 太过危险,比如说假设 num_files 的值为1000,如果一个给定目录包含1002个条目,那么就会出问题。 将它替换为 for(; entry != NULL && i < num_files ; i++)

0

应该检查数组的索引:

i<num_files

0
问题在于,如果您有任何与模式不匹配的条目(例如 . .. 条目),则会跳过数组中相应的条目。这也意味着您会在 file_names 数组之外进行编写。只有当文件名匹配时才应该增加 i
使用 getcwd()而不仅仅是使用 . 表示当前目录可以工作,但几乎没有必要。
使用 free(dir)而不是 closedir(dir)是一场无法避免的灾难。
命令行参数处理方式很不寻常。最初编写时,它将接受 -delete 作为等同于 -d 。这不是好的风格。
#include <assert.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>

bool ends_with(char *str, char *sub);
int array_find(char *key, char *argv[], int argc);

int array_find(char *key, char *argv[], int argc)
{
    for (int i = 0; i < argc; i++)
    {
        if (strcmp(key, argv[i]) == 0)
            return i;
    }
    return -1;
}

bool ends_with(char *str, char *sub)
{
    if (str == NULL && sub == NULL)
        return true;
    if (str == NULL || sub == NULL)
        return false;
    char *last_instance_of_sub = rindex(str, *sub);
    size_t sub_len = strlen(sub);
    if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
        return false;
    return strcmp(last_instance_of_sub, sub) == 0;
}

int main(int argc, char *argv[])
{
    int index = array_find("-d", argv, argc);
    char *dirname;
    if (index >= 0)
    {
        dirname = argv[index + 1];
    }
    else
    {
        dirname = getcwd(NULL, 0);
        if (dirname == NULL)
        {
            perror("Error getting current directory name.");
            exit(1);
        }
    }
    DIR *dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }
    char suffix[] = ".c";

    printf("dirname = %s\n", dirname);

    struct dirent *entry;
    int num_files = 0;
    while ((entry = readdir(dir)) != NULL)
    {
        if (ends_with(entry->d_name, suffix))
            num_files++;
    }

    if (closedir(dir) != 0)
    {
        perror("Failed to close directory.");
    }

    printf("Num files = %d\n", num_files);

    dir = opendir(dirname);
    if (dir == NULL)
    {
        perror(dirname);
        exit(1);
    }

    char *file_names[num_files];
    int i = 0;
    while ((entry = readdir(dir)) != NULL)
    {
        if (ends_with(entry->d_name, suffix))
        {
            file_names[i] = strdup(entry->d_name);
            if (file_names[i++] == NULL)
            {
                perror("Could not create the filename array.\n");
                exit(1);
            }
        }
    }
    assert(i <= num_files);
    if (i < num_files)
        num_files = i;

    for (i = 0; i < num_files; i++)
    {
        printf("%s\n", file_names[i]);
        free(file_names[i]);
    }

    closedir(dir);
    return 0;
}

我有点生气,因为我没有意识到我无论是否添加文件都在遍历数组。最近几个月我的调试技巧变得很烂。不管怎样,感谢您的帮助,也感谢其他的建议,我会考虑它们的。 - Chris

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接