从经验上看,在32位JIT中,开销最好情况下约为270%,而在64位上约为200%(每次“调用”fixed时,开销会变得更糟)。因此,如果性能真的很关键,我会尽量减少您的fixed块。
抱歉,我对fixed / unsafe代码不太熟悉,不知道为什么会出现这种情况。
详细信息:
我还添加了一些TestMore方法,这些方法将您的两个测试方法调用10次,而不是2次,以给出多个方法在您的固定结构上被调用的实际场景。
我使用的代码:
class Program
{
static void Main(string[] args)
{
var someData = new ByteArray();
int iterations = 1000000000;
var multiple = new MultipleFixed();
var single = new SingleFixed();
for (int i = 0; i < 100; i++)
{
multiple.Test(ref someData);
single.Test(ref someData);
multiple.TestMore(ref someData);
single.TestMore(ref someData);
}
if (Debugger.IsAttached)
Console.WriteLine("Debugger is attached!!!!!!!!!! This run is invalid!");
Console.WriteLine("CLR Version: " + Environment.Version);
Console.WriteLine("Pointer size: {0} bytes", IntPtr.Size);
Console.WriteLine("Iterations: " + iterations);
Console.Write("Starting run for Single... ");
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
single.Test(ref someData);
}
sw.Stop();
Console.WriteLine("Completed in {0:N3}ms - {1:N2}/sec", sw.Elapsed.TotalMilliseconds, iterations / sw.Elapsed.TotalSeconds);
Console.Write("Starting run for More Single... ");
sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
single.Test(ref someData);
}
sw.Stop();
Console.WriteLine("Completed in {0:N3}ms - {1:N2}/sec", sw.Elapsed.TotalMilliseconds, iterations / sw.Elapsed.TotalSeconds);
Console.Write("Starting run for Multiple... ");
sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
multiple.Test(ref someData);
}
sw.Stop();
Console.WriteLine("Completed in {0:N3}ms - {1:N2}/sec", sw.Elapsed.TotalMilliseconds, iterations / sw.Elapsed.TotalSeconds);
Console.Write("Starting run for More Multiple... ");
sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
multiple.TestMore(ref someData);
}
sw.Stop();
Console.WriteLine("Completed in {0:N3}ms - {1:N2}/sec", sw.Elapsed.TotalMilliseconds, iterations / sw.Elapsed.TotalSeconds);
Console.ReadLine();
}
}
unsafe struct ByteArray
{
public fixed byte Data[1024];
}
class MultipleFixed
{
unsafe void SetValue(ref ByteArray bytes, int index, byte value)
{
fixed (byte* data = bytes.Data)
{
data[index] = value;
}
}
unsafe bool Validate(ref ByteArray bytes, int index, byte expectedValue)
{
fixed (byte* data = bytes.Data)
{
return data[index] == expectedValue;
}
}
public void Test(ref ByteArray bytes)
{
SetValue(ref bytes, 0, 1);
Validate(ref bytes, 0, 1);
}
public void TestMore(ref ByteArray bytes)
{
SetValue(ref bytes, 0, 1);
Validate(ref bytes, 0, 1);
SetValue(ref bytes, 0, 2);
Validate(ref bytes, 0, 2);
SetValue(ref bytes, 0, 3);
Validate(ref bytes, 0, 3);
SetValue(ref bytes, 0, 4);
Validate(ref bytes, 0, 4);
SetValue(ref bytes, 0, 5);
Validate(ref bytes, 0, 5);
}
}
class SingleFixed
{
unsafe void SetValue(byte* data, int index, byte value)
{
data[index] = value;
}
unsafe bool Validate(byte* data, int index, byte expectedValue)
{
return data[index] == expectedValue;
}
public unsafe void Test(ref ByteArray bytes)
{
fixed (byte* data = bytes.Data)
{
SetValue(data, 0, 1);
Validate(data, 0, 1);
}
}
public unsafe void TestMore(ref ByteArray bytes)
{
fixed (byte* data = bytes.Data)
{
SetValue(data, 0, 1);
Validate(data, 0, 1);
SetValue(data, 0, 2);
Validate(data, 0, 2);
SetValue(data, 0, 3);
Validate(data, 0, 3);
SetValue(data, 0, 4);
Validate(data, 0, 4);
SetValue(data, 0, 5);
Validate(data, 0, 5);
}
}
}
以下是在.NET 4.0、32位JIT下的结果:
CLR Version: 4.0.30319.239
Pointer size: 4 bytes
Iterations: 1000000000
Starting run for Single... Completed in 2,092.350ms - 477,931,580.94/sec
Starting run for More Single... Completed in 2,236.767ms - 447,073,934.63/sec
Starting run for Multiple... Completed in 5,775.922ms - 173,132,528.92/sec
Starting run for More Multiple... Completed in 26,637.862ms - 37,540,550.36/sec
在.NET 4.0中,64位JIT:
CLR Version: 4.0.30319.239
Pointer size: 8 bytes
Iterations: 1000000000
Starting run for Single... Completed in 2,907.946ms - 343,885,316.72/sec
Starting run for More Single... Completed in 2,904.903ms - 344,245,585.63/sec
Starting run for Multiple... Completed in 5,754.893ms - 173,765,185.93/sec
Starting run for More Multiple... Completed in 18,679.593ms - 53,534,358.13/sec