为什么C语言(以及C++)不能优化这个递归实验?

4

我在思考函数式编程技术、递归、常量性和链表,所以我进行了两个实验来测试我的编译器。理论上,编译器应该能够优化掉尾递归(我现在意识到这里提供的函数并不是尾递归,并为试图将它们偷偷摸摸地传递得像尾递归一样而道歉,实际的尾递归变体可以在我下面发布的答案中找到),并在编译时产生结果,甚至不需要在运行时首先构建结构。

如下所示的数组变体按预期工作:

/**
 * @file test1.c
 */

static inline int my_array_sum(const int n, const int * const xs) {
    if (n == 0) {
        return n;
    } else {
        return n + my_array_sum(n - 1, xs + 1);
    }
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

在MinGW/MSys 64位环境下,通过以下命令(gcc test1.c -o test1.obj -c -O3, objdump -D test1.obj)生成如下目标代码:
0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq   

以下是链表的版本(虽然这是个不好的词汇,但它没有使用 malloc,且编译器可以访问关于链表的所有信息):

/**
 * @file test2.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum(const Cons c) {
    if (c.next == 0) {
        return c.x;
    } else {
        return c.x + cons_sum(*(c.next));
    }
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value (expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

执行以下命令(gcc test2.c -o test2.obj -c -O3, objdump -D test2.obj),将产生如下目标代码:

在MinGW/MSys 64位操作系统中:

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   b9 02 00 00 00          mov    $0x2,%ecx
  1b:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  20:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  25:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  2a:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  2f:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  34:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  39:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  3e:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  43:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  4a:   00
  4b:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  52:   00 00
  54:   c7 44 24 30 07 00 00    movl   $0x7,0x30(%rsp)
  5b:   00
  5c:   c7 44 24 40 06 00 00    movl   $0x6,0x40(%rsp)
  63:   00
  64:   c7 44 24 50 05 00 00    movl   $0x5,0x50(%rsp)
  6b:   00
  6c:   c7 44 24 60 04 00 00    movl   $0x4,0x60(%rsp)
  73:   00
  74:   c7 44 24 70 03 00 00    movl   $0x3,0x70(%rsp)
  7b:   00
  7c:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  81:   e8 00 00 00 00          callq  86 <main+0x86>
  86:   83 c0 01                add    $0x1,%eax
  89:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  90:   c3                      retq

在Linux Mint 64位操作系统中:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  5a:   00 
  5b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  62:   00 
  63:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  6a:   00 
  6b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  72:   00 
  73:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  7a:   00 
  7b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  80:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  85:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  8a:   b8 02 00 00 00          mov    $0x2,%eax
  8f:   90                      nop
  90:   89 c6                   mov    %eax,%esi
  92:   8b 02                   mov    (%rdx),%eax
  94:   48 8b 52 08             mov    0x8(%rdx),%rdx
  98:   01 f1                   add    %esi,%ecx
  9a:   48 85 d2                test   %rdx,%rdx
  9d:   75 f1                   jne    90 <main+0x90>
  9f:   01 c8                   add    %ecx,%eax
  a1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  a6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  ad:   00 00 
  af:   75 05                   jne    b6 <main+0xb6>
  b1:   48 83 c4 78             add    $0x78,%rsp
  b5:   c3                      retq   
  b6:   e8 00 00 00 00          callq  bb <main+0xbb>

我尝试在C和C++中编译上述代码,两者都给出了大致相同的反汇编结果。

标准是否保证即使是严格常量的链表变体(在堆栈上,因为我可以看到全局链表不会被优化)也不会像简单的数组那样得到优化,还是说这只是编译器的当前状态,并且在将来可能会被优化?或者我是否忘记了启用此类优化的某种标志?


5
“标准是否保证”:“标准对此没有任何规定。它只规定程序的可观察行为应该是什么样子,其他所有内容都由编译器决定。” - user17732522
3
在C++中,你可以在变量声明和/或函数声明中使用constexpr/consteval关键字,以请求编译器在编译时进行计算。例如:https://godbolt.org/z/z1c3P33q1 - user17732522
很有趣,我从未考虑过查找链接列表的constexpr,我不确定是否可能,但如果可以并且能够实现我期望的优化,那将非常酷。 - Dmytro
3
这些函数不是尾递归函数,因此这里没有进行尾递归转换或优化。相反,第一个函数只是递归展开,直到可以被常量折叠掉,而第二个则不会。 - Chris Dodd
1
@ChrisDodd:请注意,GCC针对Linux的目标确实内联了递归函数并将递归优化为循环,正如我们在代码的test / jne 90部分所看到的那样。尽管缺少递归。将递归转换为迭代是编译器通常非常有用的优化,因为递归对于小函数特别是具有潜在高递归深度的函数来说效率较低。 - Peter Cordes
显示剩余7条评论
2个回答

1

我通过将所有内容都设为constexpr来在C++中获得了期望的结果(虽然使用static看起来有点丑,但它有效):

/**
 * @file test3.cpp
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

constexpr int cons_sum(const Cons c) {
    if (c.next == 0) {
        return c.x;
    } else {
        return c.x + cons_sum(*(c.next));
    }
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    static constexpr const Cons s0 {8, 0};
    static constexpr const Cons s1 {7, &s0};
    static constexpr const Cons s2 {6, &s1};
    static constexpr const Cons s3 {5, &s2};
    static constexpr const Cons s4 {4, &s3};
    static constexpr const Cons s5 {3, &s4};
    static constexpr const Cons s6 {2, &s5};
    static constexpr const Cons h {1, &s6};    
    
    // | To produce a return value(expect 36).
    constexpr int sum = cons_sum(h); 

    return sum;
}

生产(g++ test3.cpp -o test3.obj -c -O3, objdump -D test3.obj):

在MinGW/MSys 64位环境中:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中:
0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq 

3
你现在的列表不再位于堆栈中,因为你给它分配了静态存储期。正如我在问题下面的评论中所示,你不需要这个“丑陋”的结构。你只需要将函数声明为 constevalconstexpr(在后一种情况下,返回值需要存储到一个 constexpr 变量中)。不幸的是,你不能将 main 声明为其中任何一个,因此你必须将其移到另一个函数中。此外,你的问题标记为 c,因此似乎不适合 C++-特定的答案。 - user17732522
@user17732522 不是这样,但我一开始就没有关心它在堆栈上的位置,对于我的表述不够清楚我感到抱歉。 我将其放在堆栈上希望编译器更有可能执行此行为。 我想让编译器忽略一切并返回36。 - Dmytro
@user17732522 你说得对,这是一个标记为C的问题,我不确定如何标记它,因为我想要的解决方案可以是C或C++(最好两者都有),但最初的问题本质上是C,而我在编写问题时甚至没有考虑到constexpr,所以我希望解决方案是一个C解决方案。我不确定如何解决这个不一致性,也不确定这是否是一个有效的解决方案,因为我的问题是是否可能让C做到这一点。 - Dmytro

1

如果您不关心尾递归,请忽略此答案

如上所述,问题最初谈论的是尾递归,而我提供的是常规递归而不是尾递归。

为了完整起见,下面是实际的尾递归函数。

数组版本:

/**
 * @file test1_tail.c
 */

int my_array_sum_tail(const int n, const int * const xs, const int accumulator) {
    if (n == 1) {
        return accumulator + xs[n - 1];
    } else {
        return my_array_sum_tail(n - 1, xs, accumulator + xs[n - 1]);
    }
}

int my_array_sum(const int n, const int * const xs) {
    return my_array_sum_tail(n, xs, 0);
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

生成以下目标代码 (gcc test1_tail.c -o test1_tail.obj -c -O3, objdump -D test1_tail.obj):

在MinGW/MSys 64位中:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

在 Linux Mint 64 位系统中:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq 

列表版本:

/**
 * @file test2_tail.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

static inline int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value(expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

执行以下命令(gcc test2_tail.c -o test2_tail.obj -c -O3, objdump -D test2_tail.obj)会生成下面的目标代码:

在MinGW/MSys 64位环境下:

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   41 b8 01 00 00 00       mov    $0x1,%r8d
  1c:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  21:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  26:   b9 02 00 00 00          mov    $0x2,%ecx
  2b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  30:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  35:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  3a:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  3f:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  44:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  49:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  50:   00
  51:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  58:   00 00
  5a:   48 c7 44 24 30 07 00    movq   $0x7,0x30(%rsp)
  61:   00 00
  63:   48 c7 44 24 40 06 00    movq   $0x6,0x40(%rsp)
  6a:   00 00
  6c:   48 c7 44 24 50 05 00    movq   $0x5,0x50(%rsp)
  73:   00 00
  75:   48 c7 44 24 60 04 00    movq   $0x4,0x60(%rsp)
  7c:   00 00
  7e:   48 c7 44 24 70 03 00    movq   $0x3,0x70(%rsp)
  85:   00 00
  87:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  8c:   e8 00 00 00 00          callq  91 <main+0x91>
  91:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  98:   c3                      retq

在 Linux Mint 64 位系统中:
0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 14 00 00 00    movl   $0x0,0x14(%rsp)
  5a:   00 
  5b:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  62:   00 
  63:   c7 44 24 24 00 00 00    movl   $0x0,0x24(%rsp)
  6a:   00 
  6b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  72:   00 
  73:   c7 44 24 34 00 00 00    movl   $0x0,0x34(%rsp)
  7a:   00 
  7b:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  82:   00 
  83:   c7 44 24 44 00 00 00    movl   $0x0,0x44(%rsp)
  8a:   00 
  8b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  92:   00 
  93:   c7 44 24 54 00 00 00    movl   $0x0,0x54(%rsp)
  9a:   00 
  9b:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  a2:   00 
  a3:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  a8:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  ad:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  b2:   b8 02 00 00 00          mov    $0x2,%eax
  b7:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  be:   00 00 
  c0:   89 c6                   mov    %eax,%esi
  c2:   8b 02                   mov    (%rdx),%eax
  c4:   48 8b 52 08             mov    0x8(%rdx),%rdx
  c8:   01 f1                   add    %esi,%ecx
  ca:   48 85 d2                test   %rdx,%rdx
  cd:   75 f1                   jne    c0 <main+0xc0>
  cf:   01 c8                   add    %ecx,%eax
  d1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  d6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  dd:   00 00 
  df:   75 05                   jne    e6 <main+0xe6>
  e1:   48 83 c4 78             add    $0x78,%rsp
  e5:   c3                      retq   
  e6:   e8 00 00 00 00          callq  eb <main+0xeb>

以下是使用c++/constexpr的尾递归版本,与其他答案相同:

/**
 * @file test3_tail.cpp
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

constexpr int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

constexpr int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    static constexpr const Cons s0 {8, 0};
    static constexpr const Cons s1 {7, &s0};
    static constexpr const Cons s2 {6, &s1};
    static constexpr const Cons s3 {5, &s2};
    static constexpr const Cons s4 {4, &s3};
    static constexpr const Cons s5 {3, &s4};
    static constexpr const Cons s6 {2, &s5};
    static constexpr const Cons h {1, &s6};    
    
    // | To produce a return value(expect 36).
    constexpr int sum = cons_sum(h); 

    return sum;
}

执行以下命令 (g++ test3_tail.c -o test3_tail.obj -c -O3, objdump -D test3_tail.obj),将生成如下目标代码:

在MinGW/MSys 64位系统中:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp

在Linux Mint 64位操作系统中:
0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq  

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接