从C程序中删除注释

3
以下是我用于删除C程序中注释的代码。但是注释行没有被删除。它可以删除/**/,但无法删除这些分隔符之间的句子。
#include <stdio.h>

main(int argc, char *argv[]) {
    FILE *fp, *ft;
    char ch;
    if (argc < 3) {
        printf("No file name given");
    }
    fp = fopen(argv[1], "r");
    ft = fopen(argv[2], "w");
    if (fp == NULL)
        printf("Opening error");
    if (ft == NULL)
        printf("Opening error");
    while (1) {
        ch = fgetc(fp);
        if (ch == EOF)
            break;
        if (ch == '/') {
            ch = fgetc(fp);
            if (ch == '*') {
                putc(' ', ft);
            }
        } else if (ch == '*') {
            ch = fgetc(fp);
            if (ch == '/') {
                putc(' ', ft);
            }
        } else {
            putc(ch, ft);
        }
    }
    fclose(fp);
    fclose(ft); 
}

请帮我删除注释行。


2
你为什么认为那会起作用呢?为什么不在调试器中单步执行,直到 /* 之后。然后问问自己,我需要做什么才能跳过所有字符直到 */ - John Saunders
@Muskaan:你可以通过点击下方灰色勾选符号接受一条回答,并点赞那些帮助了你的回答。 - chqrlie
7个回答

1

你的代码存在多个问题:

  • main函数的返回类型不应该被省略。隐式的int已经过时,不再被C标准所允许。函数原型应该是int main(int argc, char *argv[])
  • 如果没有传递命令行参数给程序,它应该在输出错误信息后退出,错误信息应该被输出到stderr而不是stdout
  • 如果输入文件无法打开,程序不应该创建输出文件。
  • 如果fopen失败,程序应该停止而不是进入未定义行为的领域。
  • 为了使测试(ch == EOF)正确运行,ch必须具有int类型而不是char类型。
  • 你正确地识别了序列/**/并用单个替换它们,但实际上它会删除所有其他出现的/*以及随后的字符,并且你没有任何跳过其间字符的规定。
  • main函数应该返回0
请注意,如果序列/**/出现在单行注释或字符串或字符常量中,则您的方法可能无法正确识别注释。此外,您还应处理转义换行符(行末处的\),因为它们可能出现在/*之间,隐藏注释开始或结束序列。
这是一个修改后的版本,可以处理这些情况:
#include <stdio.h>

/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp) {
    int ch;
    while ((ch = getc(fp)) == '\\') {
        if ((ch = getc(fp)) != '\n') {
            ungetc(ch, fp);
            return '\\';
        }
    }
    return ch;
}

/* read and write character and string constants */
int skipstr(int cch, FILE *fp, FILE *ft) {
    int ch;
    putc(cch, ft);
    while ((ch = getcpp(fp)) != EOF) {
        putc(ch, ft);
        if (ch == cch)
            return 0;
        if (ch == '\\') {
            if ((ch = getcpp(fp)) == EOF)
                return EOF;
            putc(ch, ft);
        }
    }
    return EOF;
}

int main(int argc, char *argv[]) {
    FILE *fp, *ft;
    int ch;

    if (argc < 3) {
        fprintf(stderr, "Missing arguments. Need input and output filenames\n");
        return 1;
    }
    if ((fp = fopen(argv[1], "r")) == NULL) {
        fprintf(stderr, "Cannot open input file %s\n", argv[1]);
        return 1;
    }
    if ((ft = fopen(argv[2], "w")) == NULL) {
        fprintf(stderr, "Cannot open output file %s\n", argv[2]);
        return 1;
    }
    while ((ch = getcpp(fp)) != EOF) {
        if (ch == '\'' || ch == '"') {
            if (skipstr(ch, fp, ft)) {
                fprintf(stderr, "unterminated string or character constant\n");
                break;
            }
            continue;
        }
        if (ch == '/') {
            if ((ch = getcpp(fp)) == '*') {
                /* multi-line comment */
                int lastc = 0;
                while ((ch = getcpp(fp)) != EOF) {
                    if (ch == '/' && lastc == '*') {
                        break;
                    }
                    lastc = ch;
                }
                if (ch == EOF) {
                    fprintf(stderr, "unterminated comment\n");
                    break;
                }
                ch = ' ';
            } else if (ch == '/') {
                /* single-line comment */
                while ((ch = getcpp(fp)) != EOF && ch != '\n')
                    continue;
                if (ch == EOF)
                    break;
            } else {
                putc('/', ft);
            }
        }
        putc(ch, ft);
    }
    fclose(fp);
    fclose(ft);
    return 0;
}

@Muskaan:如果这个答案对你有帮助,你可以点击下面灰色复选框接受它。 - chqrlie

0
对于这个问题,大多数答案只处理了多行注释(/..../),但也可能有单行注释(//....)。 因此,为了处理单行注释,在krouis的代码中需要进行轻微修改。
#include <stdio.h>

int main (int argc, char *argv[])
{
  FILE *fp, *ft;
  char ch, nextc;
  if (argc < 3)
  {
       printf ("No file name given");
  }
  fp = fopen (argv[1], "r");
  ft = fopen (argv[2], "w");
  if (fp == NULL)
       printf ("Opening error");
  if (ft == NULL)
       printf ("Opening error");
  nextc = fgetc (fp);
  while (nextc != EOF)
  {
     ch = nextc;
     nextc = fgetc (fp);

     if ((ch == '/') && (nextc == '*')) 
     {
        ch = fgetc (fp);
        nextc = fgetc (fp);
        while (!((ch == '*') && (nextc == '/'))) /* unroll until the end of comment*/
        {
          ch = nextc;
          nextc = fgetc (fp);
        }
        nextc = fgetc (fp);
        continue;
     }else if((ch=='/') && (nextc == '/')) // block to handle single line comment.
     {
        nextc = fgetc (fp);
        while (!(nextc == '\n')){
           nextc = fgetc (fp);
        }
       nextc = fgetc (fp);
       continue;
     }
     putc (ch, ft);
   }
  fclose (fp);
  fclose (ft);
  return 0;
 }

0

你的最后一个else部分正在写入所有不是'/''*'的字符。我已经改变了你下面的代码,*******附加行*********显示了更改的部分。尝试一下并告诉我结果?祝好运...

#include<stdio.h>

main(int argc,char*argv[])
{

   FILE *fp,*ft;
   char ch;
   int flag=0;   //**********************additional line********

   if(argc<3)
   {
        printf("No file name given");
   }
   fp=fopen(argv[1],"r");
   ft=fopen(argv[2],"w");
   if(fp==NULL)
        printf("Opening error");
   if(ft==NULL)
        printf("Opening error");
   while(1)
   {
        ch=fgetc(fp);
        if(ch==EOF)
                break;
        if(ch=='/')
        {
                ch=fgetc(fp);
                if(ch=='*')
                {
                        flag=1; //**********************additional line********
                        putc(' ',ft);
                }
        }
        else if (ch=='*')
        {
                ch=fgetc(fp);
                if(ch=='/')
                {
                        flag=0;//**********************additional line********
                        putc(' ',ft);
                }
        }
        if(flag==0)   //**********************additional line********
        {
                putc(ch,ft);
        }
   }

   fclose(fp);
   fclose(ft); 


}

请注意,我已将最后的else语句更改为if语句。 - Celik

0

只需要将上述代码中的行

while ((ch != '*') && (nextc != '/'))

改为

while (!((ch == '*') && (nextc == '/')))

即可。


您可以通过在代码前面放置四个空格或选择它并单击编辑框上方的代码按钮来使您的代码出现在代码块中。您还需要在代码和文本之间留一个空行。 - rjp

0
你的代码正确识别了注释开始序列“/”和注释结束序列“/”,并将它们删除,但没有删除它们之间的内容(例如一个标志应该这样做)。
/*
 *   flag = 1 if comment detected
 *   flag = 0 otherwise            
 */

if (flag == 0)
  {
    putc (ch, ft);
  }

如果您保留代码不变,它将删除文件中所有'/',而不仅仅是注释中的。我可以想到至少一个坏后果(例如调用头文件如<sys/time.h><sys/stat.h><netinet/in.h>等)。

由于注释的起始和结束序列宽度为两个字符,建议您使用2个“光标”读取fp,就好像每次循环读取2个字符一样。以下是一个示例(尽管它可以工作,但出于简单性和可读性的原因,它无法处理非关闭注释的边缘情况或在关闭注释序列后紧接着EOF的情况)。

#include <stdio.h>

int
main (int argc, char *argv[])
{
  FILE *fp, *ft;
  char ch, nextc;
  if (argc < 3)
   {
      printf ("No file name given");
   }
  fp = fopen (argv[1], "r");
  ft = fopen (argv[2], "w");
  if (fp == NULL)
    printf ("Opening error");
  if (ft == NULL)
    printf ("Opening error");
  nextc = fgetc (fp);
  while (nextc != EOF)
    {
      ch = nextc;
      nextc = fgetc (fp);

      if ((ch == '/') && (nextc == '*')) 
        {
          nextc = fgetc (fp);
          while ((ch != '*') && (nextc != '/')) /* unroll until the end of comment*/
            {
              ch = nextc;
              nextc = fgetc (fp);
            }
          ch = fgetc (fp);
          nextc = fgetc (fp);
        }

      putc (ch, ft);
    }
  fclose (fp);
  fclose (ft);
  return 0;
}

希望这能有所帮助。

0

你可以尝试像这样做:

#include <stdio.h>
#include <string.h>

#define READ 0
#define SINGLE_LINE_COMMENT 1
#define MULTILINE_COMMENT 2
#define STRING_READ 3
#define CHAR_READ 4
int row = 1;
int col = 0;
int er_line = 0;
int er_col = 0;
void read_source(FILE *src, FILE *dst, int flag, char prev_char, int past_read)
{
    if (feof(src))
    {
        if (flag == STRING_READ)
        {
            printf("Error : non-terminatig string at line :%d col :%d \n", er_line, er_col);
        }
        if (flag == CHAR_READ)
        {
            printf("Error : non-terminatig char constant at line :%d col :%d  \n", er_line, er_col);
        }
        if (flag == MULTILINE_COMMENT)
        {
            printf("Error : comment reach to end of file at line :%d col :%d \n", er_line, er_col);
        }
        fclose(src);
        fclose(dst);
        return;
    }

    char ch = fgetc(src);
    past_read++;
    if (ch == '\n')
    {
        row++;
        col = 0;
    }
    else
    {
        col++;
    }
    char next_ch = '\0';
    switch (ch)
    {
    case '\n':
        if (flag == SINGLE_LINE_COMMENT)
        {
            flag = READ;
            past_read = 0;
        }
        else
        {
            if (flag == STRING_READ)
            {
                printf("Error : non-terminatig string at line :%d col :%d \n", er_line, er_col);
                return;
            }
            if (flag == CHAR_READ)
            {
                printf("Error : non-terminatig char constant at line :%d col :%d  \n", er_line, er_col);
                return;
            }
        }
        break;
    case '/':
        next_ch = fgetc(src);
        if (next_ch == '/')
        {
            if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
            {
                flag = SINGLE_LINE_COMMENT;
                er_line = row;
                er_col = col;
                past_read = 0;
            }
        }
        else
        {
            if (next_ch == '*')
            {
                if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
                {
                    flag = MULTILINE_COMMENT;
                    er_line = row;
                    er_col = col;
                    past_read = 0;
                }
            }
            else
            {
                fseek(src, -1, SEEK_CUR);
            }
        }
        break;
    case '"':
        if (prev_char != '\\')
        {
            if (flag == STRING_READ)
            {
                flag = READ;
                past_read = 0;
            }
            else
            {
                if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
                {
                    flag = STRING_READ;
                    er_line = row;
                    er_col = col;
                    past_read = 0;
                }
            }
        }
        break;
    case '\'':
        if (prev_char != '\\')
        {
            if (flag == CHAR_READ)
            {
                flag = READ;
                past_read = 0;
            }
            else
            {
                if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
                {
                    flag = CHAR_READ;
                    er_line = row;
                    er_col = col;
                    past_read = 0;
                }
            }
        }
        else
        {
            if (flag == CHAR_READ)
            {
                if (past_read > 2)
                {
                    flag = READ;
                    past_read = 0;
                }
            }
        }
        break;
    case '*':
        if (flag == MULTILINE_COMMENT)
        {
            next_ch = fgetc(src);
            if (next_ch == '/')
            {
                ch = '\0';
                flag = READ;
                past_read = 0;
            }
            else
            {
                fseek(src, -1, SEEK_CUR);
            }
        }
        break;
    }

    //to work with char constant
    if (flag == CHAR_READ)
    {
        if (ch != '\\')
        {
            if (prev_char != '\\')
            {
                if (past_read > 3)
                {
                    printf(" Error : non-terminatig char constant at line :%d col :%d\n", er_line, er_col);
                    return;
                }
            }
        }
        else
        {
            if (past_read > 3)
            {
                printf(" Error : non-terminatig char constant at line :%d col :%d\n", er_line, er_col);
                return;
            }
        }
    }

    if (flag != MULTILINE_COMMENT && flag != SINGLE_LINE_COMMENT && ch != '\0' && ch != EOF)
    {
        fputc(ch, dst);
    }

    read_source(src, dst, flag, ch, past_read);
    return;
}

int main(int argc, char **argv)
{

    FILE *fp = fopen(argv[1], "r");
    FILE *fp2 = NULL;
    if (fp == NULL)
    {
        printf("Unable to open file %s\n", argv[1]);
        return 0;
    }
    fp2 = fopen(argv[2], "w");
    if (fp2 == NULL)
    {
        printf("Unable to open file %s\n", argv[2]);
    }
    read_source(fp, fp2, READ, '\0', 0);
    return 0;
}

0
/* This file is to remove all comments from a c/c++ source file */
/* Modified by John Dai 2020-05-06 */

#include <stdio.h>

int main (void)
{
  char *sourceFile = "D:/Temp/MyCfile.cpp"; //your source code
  char *outputFile = "D:/Temp/MyCfileWoComments.cpp"; //output file

  FILE *fp, *ft;
  char ch, nextc;

  fp = fopen (sourceFile, "r");
  ft = fopen (outputFile, "w");
  if (fp == NULL) {printf ("Error in opening source file\n"); return 1;}
  if (ft == NULL) {printf ("Error in opening output file\n"); return 1;}

  nextc = fgetc (fp);
  while (nextc != EOF)
    {
      ch = nextc;
      nextc = fgetc (fp);


      if ((ch == '/') && (nextc == '/'))
      {
          nextc = fgetc (fp);
          while (nextc != '\n') {// move to the end of line
              nextc = fgetc (fp);
          }
          ch = nextc; //end of line character
          nextc = fgetc(fp); //read 1st character from a new line
      }


      else if ((ch == '/') && (nextc == '*')){
      {
          nextc = fgetc (fp);
          while (!((ch == '*') && (nextc == '/'))) {/* move to the end of comment*/
              ch = nextc;
              nextc = fgetc (fp);
          }
          ch = fgetc (fp); //read first character after the end of comment block
          nextc = fgetc (fp);
        }
      }

      putc (ch, ft);

    }

  fclose (fp);
  fclose (ft);
  return 0;
}

1
最好在回答中提供一些解释。 - milad
你的代码存在多个问题:如果文件以//结尾且后面没有换行符,则会出现无限循环;如果文件中有未终止的/*注释,则会出现无限循环。不支持转义换行符、字符常量和字符串常量,这些常量可能包含///*序列。 - chqrlie

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接