如果有人对最终代码感兴趣,可以使用Apache Commons Math进行统计操作,并使用Trove与基本类型的集合一起使用。
它查找具有最高度数的元素(这个想法基于@Pace和@Aniko的评论,谢谢)。
我认为最终算法的时间复杂度是O(n^2),欢迎提出建议。 假设观察结果符合正态分布,它应该适用于涉及一个定性变量与一个定量变量的任何问题。
import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.map.TIntIntMap;
import gnu.trove.map.hash.TIntIntHashMap;
import gnu.trove.procedure.TIntIntProcedure;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math.MathException;
import org.apache.commons.math.stat.inference.OneWayAnova;
import org.apache.commons.math.stat.inference.OneWayAnovaImpl;
import org.apache.commons.math.stat.inference.TestUtils;
public class TestMath {
private static final double SIGNIFICANCE_LEVEL = 0.001;
public static void main(String[] args) throws MathException {
double[][] observations = {
{150.0, 200.0, 180.0, 230.0, 220.0, 250.0, 230.0, 300.0, 190.0 },
{200.0, 240.0, 220.0, 250.0, 210.0, 190.0, 240.0, 250.0, 190.0 },
{100.0, 130.0, 150.0, 180.0, 140.0, 200.0, 110.0, 120.0, 150.0 },
{200.0, 230.0, 150.0, 230.0, 240.0, 200.0, 210.0, 220.0, 210.0 },
{200.0, 230.0, 150.0, 180.0, 140.0, 200.0, 110.0, 120.0, 150.0 }
};
final List<double[]> classes = new ArrayList<double[]>();
for (int i=0; i<observations.length; i++) {
classes.add(observations[i]);
}
OneWayAnova anova = new OneWayAnovaImpl();
boolean rejectNullHypothesis = anova.anovaTest(classes, SIGNIFICANCE_LEVEL);
System.out.println("reject null hipothesis " + (100 - SIGNIFICANCE_LEVEL * 100) + "% = " + rejectNullHypothesis);
if (rejectNullHypothesis) {
TIntSet aux = new TIntHashSet();
TIntIntMap fraud = new TIntIntHashMap();
for (int i=0; i<observations.length; i++) {
for (int j=i+1; j<observations.length; j++) {
boolean different = TestUtils.tTest(observations[i], observations[j], SIGNIFICANCE_LEVEL);
if (different) {
if (!aux.add(i)) {
if (fraud.increment(i) == false) {
fraud.put(i, 1);
}
}
if (!aux.add(j)) {
if (fraud.increment(j) == false) {
fraud.put(j, 1);
}
}
}
}
}
final int max = fraud.get(0);
fraud.retainEntries(new TIntIntProcedure() {
@Override
public boolean execute(int a, int b) {
return b != max;
}
});
if (fraud.size() > observations.length / 2) {
fraud.clear();
}
TIntIntIterator it = fraud.iterator();
while (it.hasNext()) {
it.advance();
System.out.println("Element " + it.key() + " has significant differences");
}
}
}
}