使用C语言,我想以一种方式读取文本文件的内容,使得最终得到一个字符串数组,其中第n个字符串表示文本文件的第n行。文件的行可以任意长。
有没有一种优雅的方法来实现这个功能?我知道一些巧妙的技巧可以直接将文本文件读入单个适当大小的缓冲区中,但将其分解成行会使它更加棘手(至少据我所知)。
非常感谢!
将其分成行意味着解析文本并使用0替换所有EOL(我指的是\n和\r)字符。通过这种方式,您实际上可以重复使用缓冲区,并将每行的开头仅存储到单独的char *数组中(只需进行2次遍历即可完成所有操作)。
通过这种方式,您可以为整个文件大小+2个解析执行一次读取,这可能会提高性能。
可以通过循环fgets读取文件的行数,然后创建一个二维数组,第一维是行数+1。然后,将文件重新读入数组中。
不过,您需要定义元素的长度。或者,计算最长行的大小。
示例代码:
inFile = fopen(FILENAME, "r");
lineCount = 0;
while(inputError != EOF) {
inputError = fscanf(inFile, "%s\n", word);
lineCount++;
}
fclose(inFile);
// Above iterates lineCount++ after the EOF to allow for an array
// that matches the line numbers
char names[lineCount][MAX_LINE];
fopen(FILENAME, "r");
for(i = 1; i < lineCount; i++)
fscanf(inFile, "%s", names[i]);
fclose(inFile);
你可以使用这种方式
#include <stdlib.h> /* exit, malloc, realloc, free */
#include <stdio.h> /* fopen, fgetc, fputs, fwrite */
struct line_reader {
/* All members are private. */
FILE *f;
char *buf;
size_t siz;
};
/*
* Initializes a line reader _lr_ for the stream _f_.
*/
void
lr_init(struct line_reader *lr, FILE *f)
{
lr->f = f;
lr->buf = NULL;
lr->siz = 0;
}
/*
* Reads the next line. If successful, returns a pointer to the line,
* and sets *len to the number of characters, at least 1. The result is
* _not_ a C string; it has no terminating '\0'. The returned pointer
* remains valid until the next call to next_line() or lr_free() with
* the same _lr_.
*
* next_line() returns NULL at end of file, or if there is an error (on
* the stream, or with memory allocation).
*/
char *
next_line(struct line_reader *lr, size_t *len)
{
size_t newsiz;
int c;
char *newbuf;
*len = 0; /* Start with empty line. */
for (;;) {
c = fgetc(lr->f); /* Read next character. */
if (ferror(lr->f))
return NULL;
if (c == EOF) {
/*
* End of file is also end of last line,
` * unless this last line would be empty.
*/
if (*len == 0)
return NULL;
else
return lr->buf;
} else {
/* Append c to the buffer. */
if (*len == lr->siz) {
/* Need a bigger buffer! */
newsiz = lr->siz + 4096;
newbuf = realloc(lr->buf, newsiz);
if (newbuf == NULL)
return NULL;
lr->buf = newbuf;
lr->siz = newsiz;
}
lr->buf[(*len)++] = c;
/* '\n' is end of line. */
if (c == '\n')
return lr->buf;
}
}
}
/*
* Frees internal memory used by _lr_.
*/
void
lr_free(struct line_reader *lr)
{
free(lr->buf);
lr->buf = NULL;
lr->siz = 0;
}
/*
* Read a file line by line.
* http://rosettacode.org/wiki/Read_a_file_line_by_line
*/
int
main()
{
struct line_reader lr;
FILE *f;
size_t len;
char *line;
f = fopen("foobar.txt", "r");
if (f == NULL) {
perror("foobar.txt");
exit(1);
}
/*
* This loop reads each line.
* Remember that line is not a C string.
* There is no terminating '\0'.
*/
lr_init(&lr, f);
while (line = next_line(&lr, &len)) {
/*
* Do something with line.
*/
fputs("LINE: ", stdout);
fwrite(line, len, 1, stdout);
}
if (!feof(f)) {
perror("next_line");
exit(1);
}
lr_free(&lr);
return 0;
}
realloc()
和strtok()
进行单次遍历。 - pmgmalloc()
分配数组所需的空间。从缓冲区开始。对于每个'\n',替换为0,并将下一个字符的地址放入数组中。跟踪数组大小;如果它即将溢出,则使用realloc()
重新分配空间。 - David Thornley