以下是我想到的方法,我结合了你的代码和一些并行代码,测试了大约1600万(2^24)。
public int CompareParallel(ushort[]x, ushort[] y, int len, int segLen)
{
int compareArrLen = ( len / segLen ) + 1;
int [ ] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len < segEnd ? len : segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}
public int CompareSequential ( ushort [ ] x , ushort [ ] y, int len )
{
int pos = 0;
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;
return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );
}
public int Compare( ushort [ ] x , ushort [ ] y )
{
//determined through testing to be the best on my machine
const int cutOff = 4096;
int len = x.Length < y.Length ? x.Length : y.Length;
//check if len is above a specific threshold
//and if first and a number in the middle are equal
//chose equal because we know that there is a chance that more
//then 50% of the list is equal, which would make the overhead
//worth the effort
if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
&& x [ len/2 ] == y [ len/2 ] )
{
//segment length was determined to be best through testing
//at around 8% of the size of the array seemed to have the
//on my machine
return CompareParallel ( x , y , len , (len / 100)*8 );
}
return CompareSequential ( x , y, len );
}
这是我写的测试:
class Program
private static InfoLevel logLevel = InfoLevel.Summary;
private static void LogDetail ( string content )
private static void LogSummary ( string content )
private static void LogInfo ( InfoLevel level , string content )
private static void LogInfo ( InfoLevel level , string format,
params object[] arg )
private static void LogDetail ( string format , params object [ ] arg )
private static void LogSummary ( string format , params object [ ] arg )
const string _randTestResultHeader = "\r\nRandom Array Content\r\n";
const string _equalArrayResultHeader = "Only Length Different\r\n\r\n";
const string _summaryTestResultsHeader =
"Size\t\tOrig Elps\tPara Elps\tComp Elps\r\n";
const string _summaryBodyContent =
"\t\t\t\t\t\t\r\n";
static void Main ( string [ ] args )
%"
+" of Array Size (Comp always at 8%)\r\n" , segPercent);
StringBuilder _aggrRandResults = new StringBuilder ( );
StringBuilder _aggrEqualResults = new StringBuilder ( );
_aggrRandResults.Append ( _randTestResultHeader );
_aggrEqualResults.Append ( _equalArrayResultHeader );
_aggrEqualResults.Append ( _summaryTestResultsHeader );
_aggrRandResults.Append ( _summaryTestResultsHeader );
for ( int i = 10 ; i < 25 ; i++ )
" ,
completelyRandomTestAvg [ 0 ] );
LogDetail ( "Parallel Composite Test Average: " ,
completelyRandomTestAvg [ 1 ] );
_aggrRandResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
completelyRandomTestAvg [ 0 ] ,
completelyRandomTestAvg [ 1 ] ,
completelyRandomTestAvg [ 2 ]);
testName = "Equal Len And Values";
var equalEqualTest = EqualTill ( testName , baseLen ,
baseLen, segLen );
testName = "LHS Larger";
var equalLHSLargerTest = EqualTill ( testName , baseLen + 10 ,
baseLen, segLen );
testName = "RHS Larger";
var equalRHSLargerTest = EqualTill ( testName , baseLen ,
baseLen + 10, segLen );
double [ ] mostlyEqualTestAvg = new double [ 3 ];
for ( int l = 0 ; l < mostlyEqualTestAvg.Length ; l++ )
mostlyEqualTestAvg [ l ] = ( ( equalEqualTest [ l ] +
equalLHSLargerTest [ l ] +
equalRHSLargerTest [ l ] ) / 3 );
LogDetail( "\r\nLength Different Test Results" );
LogDetail( "Original Composite Test Average: " ,
mostlyEqualTestAvg [ 0 ] );
LogDetail( "Parallel Composite Test Average: " ,
mostlyEqualTestAvg [ 1 ] );
_aggrEqualResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
mostlyEqualTestAvg [ 0 ] ,
mostlyEqualTestAvg [ 1 ] ,
mostlyEqualTestAvg [ 2 ]);
}
LogSummary ( _aggrRandResults.ToString() + "\r\n");
LogSummary ( _aggrEqualResults.ToString()+ "\r\n");
}
Console.Out.Flush ( );
}
private const string _testBody =
"\r\n\tOriginal:: Result:, Elapsed:"
+"\r\n\tParallel:: Result:, Elapsed:"
+"\r\n\tComposite:: Result:, Elapsed:";
private const string _testHeader =
"\r\nTesting , Array Lengths: , ";
public static double[] RandomRunTest(string testName, int shortArr1Len,
int shortArr2Len, int parallelSegLen)
LogDetail ( "\r\nAverage Run Time Original: " , avgTimes[0]);
LogDetail ( "Average Run Time Parallel: " , avgTimes[1]);
LogDetail ( "Average Run Time Composite: " , avgTimes [ 2 ] );
return avgTimes;
}
public static double [ ] EqualTill ( string testName, int shortArr1Len ,
int shortArr2Len, int parallelSegLen)
), Array Lengths: , ";
int baseLen = shortArr1Len > shortArr2Len
? shortArr2Len : shortArr1Len;
var shortArr1 = new ushort [ shortArr1Len ];
var shortArr2 = new ushort [ shortArr2Len ];
double [ ] avgTimes = new double [ 3 ];
LogDetail( _testHeader , testName , shortArr1Len , shortArr2Len );
for ( int i = 0 ; i < 10 ; i++ )
LogDetail ( "\r\nAverage Run Time Original: " , avgTimes [ 0 ] );
LogDetail ( "Average Run Time Parallel: " , avgTimes [ 1 ] );
LogDetail ( "Average Run Time Composite: " , avgTimes [ 2 ] );
return avgTimes;
}
static Random rand = new Random ( );
public static void FillCompareArray ( ushort[] compareArray, int length )
public static int CompareParallelOnly ( ushort [ ] x , ushort[] y,
int segLen )
} ) );
int r=compareArrLen-1;
while ( r >= 0 )
return x.Length.CompareTo ( y.Length );
}
public static int Compare ( ushort [ ] x , ushort [ ] y )
public static int CompareParallel ( ushort[] x, ushort[] y, int len,
int segLen )
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
return x.Length.CompareTo ( y.Length );
}
public static int CompareSequential(ushort [ ] x , ushort [ ] y, int len)
public static int CompareComposite ( ushort [ ] x , ushort [ ] y )
}
注意:
确保使用优化过的代码进行构建,如果没有包含此步骤,则结果会有很大不同,它会使并行代码看起来比实际改进要大得多。
我得到的结果是:在非常长的相等数字集上,执行时间减少了约33%。随着输入的增加,它仍然呈线性增长,但增长速度较慢。对于小数据集(少于4092个),它也开始变慢,但通常所需时间足够短(在我的计算机上为0.001毫秒),所以如果您得到大型几乎相等的数组,使用它将是值得的。
Math.Min
:int len = x.Length > y.Length ? y.Length : x.Length;
- Dustin KingenCompareTo
在return
中的使用,并将其编码成长格式,这也有所帮助。有时候最明显的事情是最有效的。 - Jamie Treworgyushort
数组而不是字符串作为键,因为它更加紧凑。使用整个路径作为键让我可以使用部分键从排序集合中获取视图,这对于子集查询提供了高性能。我正在尝试提高构建索引时的性能。https://dev59.com/dVnUa4cB1Zd3GeqPXiBN - Jamie Treworgy