为什么C语言（以及C++）不能优化这个递归实验？

Question

为什么C语言（以及C++）不能优化这个递归实验？

cgcccompiler-optimizationcompile-timecompile-time-constant

4

我在思考函数式编程技术、递归、常量性和链表，所以我进行了两个实验来测试我的编译器。理论上，编译器应该能够优化掉尾递归（我现在意识到这里提供的函数并不是尾递归，并为试图将它们偷偷摸摸地传递得像尾递归一样而道歉，实际的尾递归变体可以在我下面发布的答案中找到），并在编译时产生结果，甚至不需要在运行时首先构建结构。

如下所示的数组变体按预期工作：

/**
 * @file test1.c
 */

static inline int my_array_sum(const int n, const int * const xs) {
    if (n == 0) {
        return n;
    } else {
        return n + my_array_sum(n - 1, xs + 1);
    }
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

在MinGW/MSys 64位环境下，通过以下命令(gcc test1.c -o test1.obj -c -O3, objdump -D test1.obj)生成如下目标代码：

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq

以下是链表的版本（虽然这是个不好的词汇，但它没有使用 malloc，且编译器可以访问关于链表的所有信息）：

/**
 * @file test2.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum(const Cons c) {
    if (c.next == 0) {
        return c.x;
    } else {
        return c.x + cons_sum(*(c.next));
    }
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value (expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

执行以下命令(gcc test2.c -o test2.obj -c -O3, objdump -D test2.obj)，将产生如下目标代码：

在MinGW/MSys 64位操作系统中：

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   b9 02 00 00 00          mov    $0x2,%ecx
  1b:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  20:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  25:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  2a:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  2f:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  34:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  39:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  3e:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  43:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  4a:   00
  4b:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  52:   00 00
  54:   c7 44 24 30 07 00 00    movl   $0x7,0x30(%rsp)
  5b:   00
  5c:   c7 44 24 40 06 00 00    movl   $0x6,0x40(%rsp)
  63:   00
  64:   c7 44 24 50 05 00 00    movl   $0x5,0x50(%rsp)
  6b:   00
  6c:   c7 44 24 60 04 00 00    movl   $0x4,0x60(%rsp)
  73:   00
  74:   c7 44 24 70 03 00 00    movl   $0x3,0x70(%rsp)
  7b:   00
  7c:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  81:   e8 00 00 00 00          callq  86 <main+0x86>
  86:   83 c0 01                add    $0x1,%eax
  89:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  90:   c3                      retq

在Linux Mint 64位操作系统中：

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  5a:   00 
  5b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  62:   00 
  63:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  6a:   00 
  6b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  72:   00 
  73:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  7a:   00 
  7b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  80:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  85:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  8a:   b8 02 00 00 00          mov    $0x2,%eax
  8f:   90                      nop
  90:   89 c6                   mov    %eax,%esi
  92:   8b 02                   mov    (%rdx),%eax
  94:   48 8b 52 08             mov    0x8(%rdx),%rdx
  98:   01 f1                   add    %esi,%ecx
  9a:   48 85 d2                test   %rdx,%rdx
  9d:   75 f1                   jne    90 <main+0x90>
  9f:   01 c8                   add    %ecx,%eax
  a1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  a6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  ad:   00 00 
  af:   75 05                   jne    b6 <main+0xb6>
  b1:   48 83 c4 78             add    $0x78,%rsp
  b5:   c3                      retq   
  b6:   e8 00 00 00 00          callq  bb <main+0xbb>

我尝试在C和C++中编译上述代码，两者都给出了大致相同的反汇编结果。

标准是否保证即使是严格常量的链表变体（在堆栈上，因为我可以看到全局链表不会被优化）也不会像简单的数组那样得到优化，还是说这只是编译器的当前状态，并且在将来可能会被优化？或者我是否忘记了启用此类优化的某种标志？

- Dmytro

5

“标准是否保证”：“标准对此没有任何规定。它只规定程序的可观察行为应该是什么样子，其他所有内容都由编译器决定。” - user17732522

3

在C++中，你可以在变量声明和/或函数声明中使用constexpr/consteval关键字，以请求编译器在编译时进行计算。例如：https://godbolt.org/z/z1c3P33q1 - user17732522

很有趣，我从未考虑过查找链接列表的constexpr，我不确定是否可能，但如果可以并且能够实现我期望的优化，那将非常酷。 - Dmytro

3

这些函数不是尾递归函数，因此这里没有进行尾递归转换或优化。相反，第一个函数只是递归展开，直到可以被常量折叠掉，而第二个则不会。 - Chris Dodd

1

@ChrisDodd：请注意，GCC针对Linux的目标确实内联了递归函数并将递归优化为循环，正如我们在代码的test / jne 90部分所看到的那样。尽管缺少尾递归。将递归转换为迭代是编译器通常非常有用的优化，因为递归对于小函数特别是具有潜在高递归深度的函数来说效率较低。 - Peter Cordes

显示剩余7条评论

2个回答

1

如果您不关心尾递归，请忽略此答案

如上所述，问题最初谈论的是尾递归，而我提供的是常规递归而不是尾递归。

为了完整起见，下面是实际的尾递归函数。

数组版本：

/**
 * @file test1_tail.c
 */

int my_array_sum_tail(const int n, const int * const xs, const int accumulator) {
    if (n == 1) {
        return accumulator + xs[n - 1];
    } else {
        return my_array_sum_tail(n - 1, xs, accumulator + xs[n - 1]);
    }
}

int my_array_sum(const int n, const int * const xs) {
    return my_array_sum_tail(n, xs, 0);
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

生成以下目标代码 (gcc test1_tail.c -o test1_tail.obj -c -O3, objdump -D test1_tail.obj):

在MinGW/MSys 64位中：

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq

列表版本：

/**
 * @file test2_tail.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

static inline int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value(expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

执行以下命令(gcc test2_tail.c -o test2_tail.obj -c -O3, objdump -D test2_tail.obj)会生成下面的目标代码:

在MinGW/MSys 64位环境下:

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   41 b8 01 00 00 00       mov    $0x1,%r8d
  1c:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  21:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  26:   b9 02 00 00 00          mov    $0x2,%ecx
  2b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  30:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  35:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  3a:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  3f:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  44:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  49:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  50:   00
  51:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  58:   00 00
  5a:   48 c7 44 24 30 07 00    movq   $0x7,0x30(%rsp)
  61:   00 00
  63:   48 c7 44 24 40 06 00    movq   $0x6,0x40(%rsp)
  6a:   00 00
  6c:   48 c7 44 24 50 05 00    movq   $0x5,0x50(%rsp)
  73:   00 00
  75:   48 c7 44 24 60 04 00    movq   $0x4,0x60(%rsp)
  7c:   00 00
  7e:   48 c7 44 24 70 03 00    movq   $0x3,0x70(%rsp)
  85:   00 00
  87:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  8c:   e8 00 00 00 00          callq  91 <main+0x91>
  91:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  98:   c3                      retq

在 Linux Mint 64 位系统中：

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 14 00 00 00    movl   $0x0,0x14(%rsp)
  5a:   00 
  5b:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  62:   00 
  63:   c7 44 24 24 00 00 00    movl   $0x0,0x24(%rsp)
  6a:   00 
  6b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  72:   00 
  73:   c7 44 24 34 00 00 00    movl   $0x0,0x34(%rsp)
  7a:   00 
  7b:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  82:   00 
  83:   c7 44 24 44 00 00 00    movl   $0x0,0x44(%rsp)
  8a:   00 
  8b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  92:   00 
  93:   c7 44 24 54 00 00 00    movl   $0x0,0x54(%rsp)
  9a:   00 
  9b:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  a2:   00 
  a3:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  a8:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  ad:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  b2:   b8 02 00 00 00          mov    $0x2,%eax
  b7:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  be:   00 00 
  c0:   89 c6                   mov    %eax,%esi
  c2:   8b 02                   mov    (%rdx),%eax
  c4:   48 8b 52 08             mov    0x8(%rdx),%rdx
  c8:   01 f1                   add    %esi,%ecx
  ca:   48 85 d2                test   %rdx,%rdx
  cd:   75 f1                   jne    c0 <main+0xc0>
  cf:   01 c8                   add    %ecx,%eax
  d1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  d6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  dd:   00 00 
  df:   75 05                   jne    e6 <main+0xe6>
  e1:   48 83 c4 78             add    $0x78,%rsp
  e5:   c3                      retq   
  e6:   e8 00 00 00 00          callq  eb <main+0xeb>

以下是使用c++/constexpr的尾递归版本，与其他答案相同：

/**
 * @file test3_tail.cpp
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

constexpr int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

constexpr int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    static constexpr const Cons s0 {8, 0};
    static constexpr const Cons s1 {7, &s0};
    static constexpr const Cons s2 {6, &s1};
    static constexpr const Cons s3 {5, &s2};
    static constexpr const Cons s4 {4, &s3};
    static constexpr const Cons s5 {3, &s4};
    static constexpr const Cons s6 {2, &s5};
    static constexpr const Cons h {1, &s6};    
    
    // | To produce a return value(expect 36).
    constexpr int sum = cons_sum(h); 

    return sum;
}

执行以下命令 (g++ test3_tail.c -o test3_tail.obj -c -O3, objdump -D test3_tail.obj)，将生成如下目标代码:

在MinGW/MSys 64位系统中：

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp

在Linux Mint 64位操作系统中：

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq

- Dmytro

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Dmytro · Accepted Answer

我通过将所有内容都设为constexpr来在C++中获得了期望的结果（虽然使用static看起来有点丑，但它有效）：

/**
 * @file test3.cpp
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

constexpr int cons_sum(const Cons c) {
    if (c.next == 0) {
        return c.x;
    } else {
        return c.x + cons_sum(*(c.next));
    }
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    static constexpr const Cons s0 {8, 0};
    static constexpr const Cons s1 {7, &s0};
    static constexpr const Cons s2 {6, &s1};
    static constexpr const Cons s3 {5, &s2};
    static constexpr const Cons s4 {4, &s3};
    static constexpr const Cons s5 {3, &s4};
    static constexpr const Cons s6 {2, &s5};
    static constexpr const Cons h {1, &s6};    
    
    // | To produce a return value(expect 36).
    constexpr int sum = cons_sum(h); 

    return sum;
}

生产(g++ test3.cpp -o test3.obj -c -O3, objdump -D test3.obj):

在MinGW/MSys 64位环境中:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中：

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq