经过多个小时的实验,重新排列浮点表达式,利用融合乘加(FMA)和补偿求和,我得出结论,通过余弦定律进行计算在鲁棒性上不起作用,即使切换到双精度浮点运算(也称为对数运算)也不行。
正如问题中已经指出的那样,单独使用Kahan的数值优化排列是不够的,因为在角度计算开始之前,需要进行平方根运算,这已经引入了数值误差。然而,我观察到,在双精度浮点运算中进行中间计算可以得到一个鲁棒的实现。由于提问者的要求不允许使用双精度计算,这使得我们只能使用双精度浮点计算作为备选方案,当然,这对于具有FMA支持的平台来说,性能影响是显著的。一个“烟雾”测试表明,通过对Kahan算法规范进行直接翻译,可以得到一个能够提供三角形所有角度的实现,相对误差小于2的-23次方。
对于下面的C++11代码,我假设目标平台支持FMA,并且可以加速双精度浮点函数。我的测试框架基于一个非常古老的任意精度库,我已经使用了30年:R. P. Brent的1978年的MP库。我将其配置为250位精度。使用MP库的参考函数使用余弦定理计算角度,以提供稳健的单元测试。这部分代码需要替换为使用常用的现代库。我使用Intel C/C++编译器进行构建,进行了全面优化,并且符合IEEE-754浮点数要求(/fp:strict)。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
typedef struct float2 {
float x;
float y;
} float2;
float2 make_float2 (float head, float tail);
float2 add_dblflt (float2 a, float2 b);
float2 sub_dblflt (float2 a, float2 b);
float2 mul_dblflt (float2 a, float2 b);
float2 div_dblflt (float2 a, float2 b);
float2 sqrt_dblflt (float2 a);
float angle_kahanf (float asq, float bsq, float csq)
{
if (asq < bsq) { float t = bsq; bsq = asq; asq = t; }
float2 a = sqrt_dblflt (make_float2 (asq, 0));
float2 b = sqrt_dblflt (make_float2 (bsq, 0));
float2 c = sqrt_dblflt (make_float2 (csq, 0));
float2 mu = {INFINITY / INFINITY, INFINITY / INFINITY};
if ((bsq >= csq) && (csq >= 0)) mu = sub_dblflt (c, sub_dblflt (a, b));
else if ((csq > 0) && (bsq >= 0)) mu = sub_dblflt (b, sub_dblflt (a, c));
else fprintf (stderr, "angle_kahanf: not a real triangle\n");
float2 fact_0 = add_dblflt (sub_dblflt (a, b), c);
float2 num = mul_dblflt (fact_0, mu);
float2 fact_1 = add_dblflt (a, add_dblflt (b, c));
float2 fact_2 = add_dblflt (b, sub_dblflt (a, c));
float2 den = mul_dblflt (fact_1, fact_2);
float2 ratio = div_dblflt (num, den);
float2 root = sqrt_dblflt (ratio);
float atan_val = atanf (root.y);
float angle = 2.0f * atan_val;
return angle;
}
float2 make_float2 (float head, float tail)
{
float2 r;
r.x = tail;
r.y = head;
return r;
}
float2 add_dblflt (float2 a, float2 b)
{
float2 z;
float t1, t2, t3, t4, t5, e;
t1 = a.y + b.y;
t2 = t1 - a.y;
t3 = (a.y + (t2 - t1)) + (b.y - t2);
t4 = a.x + b.x;
t2 = t4 - a.x;
t5 = (a.x + (t2 - t4)) + (b.x - t2);
t3 = t3 + t4;
t4 = t1 + t3;
t3 = (t1 - t4) + t3;
t3 = t3 + t5;
z.y = e = t4 + t3;
z.x = (t4 - e) + t3;
return z;
}
float2 sub_dblflt (float2 a, float2 b)
{
float2 z;
float t1, t2, t3, t4, t5, e;
t1 = a.y - b.y;
t2 = t1 - a.y;
t3 = (a.y + (t2 - t1)) - (b.y + t2);
t4 = a.x - b.x;
t2 = t4 - a.x;
t5 = (a.x + (t2 - t4)) - (b.x + t2);
t3 = t3 + t4;
t4 = t1 + t3;
t3 = (t1 - t4) + t3;
t3 = t3 + t5;
z.y = e = t4 + t3;
z.x = (t4 - e) + t3;
return z;
}
float2 mul_dblflt (float2 a, float2 b)
{
float2 t, z;
float e;
t.y = a.y * b.y;
t.x = fmaf (a.y, b.y, -t.y);
t.x = fmaf (a.x, b.x, t.x);
t.x = fmaf (a.y, b.x, t.x);
t.x = fmaf (a.x, b.y, t.x);
z.y = e = t.y + t.x;
z.x = (t.y - e) + t.x;
return z;
}
float2 div_dblflt (float2 a, float2 b)
{
float2 t, z;
float e, r;
r = 1.0f / b.y;
t.y = a.y * r;
e = fmaf (b.y, -t.y, a.y);
t.y = fmaf (r, e, t.y);
t.x = fmaf (b.y, -t.y, a.y);
t.x = a.x + t.x;
t.x = fmaf (b.x, -t.y, t.x);
e = r * t.x;
t.x = fmaf (b.y, -e, t.x);
t.x = fmaf (r, t.x, e);
z.y = e = t.y + t.x;
z.x = (t.y - e) + t.x;
return z;
}
float2 sqrt_dblflt (float2 a)
{
float2 t, z;
float e, y, s, r;
r = 1.0f / sqrtf (a.y);
if (a.y == 0.0f) r = 0.0f;
y = a.y * r;
s = fmaf (y, -y, a.y);
r = 0.5f * r;
z.y = e = s + a.x;
z.x = (s - e) + a.x;
t.y = r * z.y;
t.x = fmaf (r, z.y, -t.y);
t.x = fmaf (r, z.x, t.x);
r = y + t.y;
s = (y - r) + t.y;
s = s + t.x;
z.y = e = r + s;
z.x = (r - e) + s;
return z;
}
#include "mpglue.h"
double lawOfCosines_ref (double asq, double bsq, double csq)
{
struct mpNum mpAsq, mpBsq, mpCsq, mpTmp0, mpTmp1;
double angle;
mpDoubleToMp (asq, &mpAsq);
mpDoubleToMp (bsq, &mpBsq);
mpDoubleToMp (csq, &mpCsq);
mpAdd (&mpAsq, &mpBsq, &mpTmp0);
mpSub (&mpTmp0, &mpCsq, &mpTmp0);
mpMul (&mpAsq, &mpBsq, &mpTmp1);
mpSqrt (&mpTmp1, &mpTmp1);
mpMul (mpTwo(), &mpTmp1, &mpTmp1);
mpDiv (&mpTmp0, &mpTmp1, &mpTmp0);
mpAcos (&mpTmp0, &mpTmp0);
mpMpToDouble (&mpTmp0, &angle);
return angle;
}
static uint32_t kiss_z=362436069, kiss_w=521288629;
static uint32_t kiss_jsr=123456789, kiss_jcong=380116160;
#define znew (kiss_z=36969*(kiss_z&65535)+(kiss_z>>16))
#define wnew (kiss_w=18000*(kiss_w&65535)+(kiss_w>>16))
#define MWC ((znew<<16)+wnew )
#define SHR3 (kiss_jsr^=(kiss_jsr<<13),kiss_jsr^=(kiss_jsr>>17), \
kiss_jsr^=(kiss_jsr<<5))
#define CONG (kiss_jcong=69069*kiss_jcong+1234567)
#define KISS ((MWC^CONG)+SHR3)
int randint (int a)
{
return KISS % a;
}
#define MIN(x,y) (fmin(x,y))
#define MAX(x,y) (fmax(x,y))
#define MIN3(x,y,z) MIN(x,MIN(y,z))
#define MAX3(x,y,z) MAX(x,MAX(y,z))
#define MED3(x,y,z) MIN(MAX(MIN(y,z),x),MAX(y,z))
#define ERR_LIMIT (0x1.0p-23)
#define SCALE (0.00001357)
int main (void)
{
mpInit();
unsigned long long int count = 0;
double A_ref = 0, B_ref = 0, C_ref = 0;
double relerrA, relerrB, relerrC;
float A = 0, B = 0, C = 0;
do {
double a, b, c, aa, bb, cc;
float asq, bsq, csq;
if ((count & 0xfff) == 0) printf ("\rcount=%llu", count);
do {
a = (randint (1 << 23) + 1) * SCALE;
b = (randint (1 << 23) + 1) * SCALE;
c = (randint (1 << 23) + 1) * SCALE;
aa = MIN3 (a, b, c);
bb = MED3 (a, b, c);
cc = MAX3 (a, b, c);
} while ((aa + bb) <= (1.000001 * cc));
asq = (float)(a * a);
bsq = (float)(b * b);
csq = (float)(c * c);
A = angle_kahanf (bsq, csq, asq);
B = angle_kahanf (csq, asq, bsq);
C = angle_kahanf (asq, bsq, csq);
A_ref = lawOfCosines_ref ((double)bsq, (double)csq, (double)asq);
B_ref = lawOfCosines_ref ((double)csq, (double)asq, (double)bsq);
C_ref = lawOfCosines_ref ((double)asq, (double)bsq, (double)csq);
relerrA = fabs ((A - A_ref) / A_ref);
relerrB = fabs ((B - B_ref) / B_ref);
relerrC = fabs ((C - C_ref) / C_ref);
if (relerrA > ERR_LIMIT) {
printf ("!!!! asq=%15.8e bsq=%15.8e csq=%15.8e A=%15.8e A_ref=%15.8e relerr=%15.8e\n",
asq, bsq, csq, A, A_ref, relerrA);
}
if (relerrB > ERR_LIMIT) {
printf ("!!!! asq=%15.8e bsq=%15.8e csq=%15.8e B=%15.8e B_ref=%15.8e relerr=%15.8e\n",
asq, bsq, csq, B, B_ref, relerrB);
}
if (relerrC > ERR_LIMIT) {
printf ("!!!! asq=%15.8e bsq=%15.8e csq=%15.8e C=%15.8e C_ref=%15.8e relerr=%15.8e\n",
asq, bsq, csq, C, C_ref, relerrC);
}
count++;
} while (count < 1000000);
return EXIT_SUCCESS;
}
std::acos( (b+a-c) / (2.*std::sqrt(b*a)) );
使用了double
版本的除法和acos()
,因为2.
感染了float
代码,导致了double
代码。再试一次,使用std::acos( (b+a-c) / (2.0f*std::sqrt(b*a)) );
来保持全部都是float
。 - undefinedb+a-c
应该改为max(a,b)-c + min(a,b)
。2)尽可能使用fma()
函数。 - undefineda+b-c
得到的结果是−2u,这是精确的,但max(a,b)-c
得到的结果是−1/2,然后max(a,b)-c + min(a,b)
得到的结果是−(3/2)u,误差很重要。 - undefineda
、b
和c
的值与三角形 "(0,0) (1,1) (1+e,1)" 不对应。为了避免歧义,你应该以十六进制表示法(使用printf
中的%a
)给出a
、b
和c
的精确值,并且在浮点类型上要明确:你的例子混合了float
和double
(1e-7
)。 - undefined