use of org.apache.commons.math3.stat.inference.ChiSquareTest in project GDSC-SMLM by aherbert.
the class ChiSquaredDistributionTableTest method canPerformChiSquaredTest.
@Test
public void canPerformChiSquaredTest() {
ChiSquareTest test = new ChiSquareTest();
for (int n : new int[] { 10, 50, 100 }) {
double[] x = Utils.newArray(n, 0.5, 1.0);
long[] l = new long[x.length];
RandomDataGenerator rdg = new RandomDataGenerator(new Well19937c(30051977));
for (int i = 0; i < x.length; i++) l[i] = rdg.nextPoisson(x[i]);
double chi2 = test.chiSquare(x, l);
double ep = test.chiSquareTest(x, l);
int df = x.length - 1;
double o = ChiSquaredDistributionTable.computeQValue(chi2, df);
Assert.assertEquals(ep, o, 1e-10);
ChiSquaredDistributionTable upperTable = ChiSquaredDistributionTable.createUpperTailed(o, df);
double upper = chi2 * 1.01;
double lower = chi2 * 0.99;
Assert.assertTrue("Upper did not reject higher", upperTable.reject(upper, df));
Assert.assertFalse("Upper did not reject actual value", upperTable.reject(o, df));
Assert.assertFalse("Upper did not accept lower", upperTable.reject(lower, df));
}
}
use of org.apache.commons.math3.stat.inference.ChiSquareTest in project GDSC-SMLM by aherbert.
the class ChiSquaredDistributionTableTest method canPerformChiSquaredTest.
@SeededTest
void canPerformChiSquaredTest(RandomSeed seed) {
final UniformRandomProvider rng = RngUtils.create(seed.getSeed());
final ChiSquareTest test = new ChiSquareTest();
for (final int n : new int[] { 10, 50, 100 }) {
final double[] x = SimpleArrayUtils.newArray(n, 0.5, 1.0);
final long[] l = new long[x.length];
for (int i = 0; i < x.length; i++) {
l[i] = GdscSmlmTestUtils.createPoissonSampler(rng, x[i]).sample();
}
final double chi2 = test.chiSquare(x, l);
final double ep = test.chiSquareTest(x, l);
final int df = x.length - 1;
final double o = ChiSquaredDistributionTable.computeQValue(chi2, df);
Assertions.assertEquals(ep, o, 1e-10);
final ChiSquaredDistributionTable upperTable = ChiSquaredDistributionTable.createUpperTailed(o, df);
final double upper = chi2 * 1.01;
final double lower = chi2 * 0.99;
Assertions.assertTrue(upperTable.reject(upper, df), "Upper did not reject higher");
Assertions.assertFalse(upperTable.reject(o, df), "Upper did not reject actual value");
Assertions.assertFalse(upperTable.reject(lower, df), "Upper did not accept lower");
}
}
use of org.apache.commons.math3.stat.inference.ChiSquareTest in project vcell by virtualcell.
the class TimeSeriesMultitrialData method chiSquaredTest.
public static double chiSquaredTest(double[] rawData1, double[] rawData2) {
try {
int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
// rawData2 = ramp(0,10,rawData2.length);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp1 = new TimeSeriesMultitrialData.MinMaxHelp(rawData1);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp2 = new TimeSeriesMultitrialData.MinMaxHelp(rawData2);
double min = Math.min(minMaxHelp1.min, minMaxHelp2.min);
double max = Math.max(minMaxHelp1.max, minMaxHelp2.max);
long[] histogram1 = calcHistogram(rawData1, min, max, numBins);
long[] histogram2 = calcHistogram(rawData2, min, max, numBins);
//
// remove histogram indices where both bins are zero
//
ArrayList<Long> histogram1List = new ArrayList<Long>();
ArrayList<Long> histogram2List = new ArrayList<Long>();
for (int i = 0; i < histogram1.length; i++) {
if (histogram1[i] != 0 || histogram2[i] != 0) {
histogram1List.add(histogram1[i]);
histogram2List.add(histogram2[i]);
// }else{
// histogram1List.add(new Long(1));
// histogram2List.add(new Long(1));
}
}
histogram1 = new long[histogram1List.size()];
histogram2 = new long[histogram2List.size()];
for (int i = 0; i < histogram1List.size(); i++) {
histogram1[i] = histogram1List.get(i);
histogram2[i] = histogram2List.get(i);
}
if (histogram1.length == 1) {
return 0.0;
}
ChiSquareTest chiSquareTest = new ChiSquareTest();
return chiSquareTest.chiSquareTestDataSetsComparison(histogram1, histogram2);
} catch (Exception e) {
e.printStackTrace(System.out);
return -1;
}
}
use of org.apache.commons.math3.stat.inference.ChiSquareTest in project hadoop by apache.
the class TestClusterTopology method testChooseRandom.
/**
* Test how well we pick random nodes.
*/
@Test
public void testChooseRandom() {
// create the topology
NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
NodeElement node1 = getNewNode("node1", "/d1/r1");
cluster.add(node1);
NodeElement node2 = getNewNode("node2", "/d1/r2");
cluster.add(node2);
NodeElement node3 = getNewNode("node3", "/d1/r3");
cluster.add(node3);
NodeElement node4 = getNewNode("node4", "/d1/r3");
cluster.add(node4);
// Number of iterations to do the test
int numIterations = 100;
// Pick random nodes
HashMap<String, Integer> histogram = new HashMap<String, Integer>();
for (int i = 0; i < numIterations; i++) {
String randomNode = cluster.chooseRandom(NodeBase.ROOT).getName();
if (!histogram.containsKey(randomNode)) {
histogram.put(randomNode, 0);
}
histogram.put(randomNode, histogram.get(randomNode) + 1);
}
assertEquals("Random is not selecting all nodes", 4, histogram.size());
// Check with 99% confidence (alpha=0.01 as confidence = (100 * (1 - alpha)
ChiSquareTest chiSquareTest = new ChiSquareTest();
double[] expected = new double[histogram.size()];
long[] observed = new long[histogram.size()];
int j = 0;
for (Integer occurrence : histogram.values()) {
expected[j] = 1.0 * numIterations / histogram.size();
observed[j] = occurrence;
j++;
}
boolean chiSquareTestRejected = chiSquareTest.chiSquareTest(expected, observed, 0.01);
// Check that they have the proper distribution
assertFalse("Not choosing nodes randomly", chiSquareTestRejected);
// Pick random nodes excluding the 2 nodes in /d1/r3
histogram = new HashMap<String, Integer>();
for (int i = 0; i < numIterations; i++) {
String randomNode = cluster.chooseRandom("~/d1/r3").getName();
if (!histogram.containsKey(randomNode)) {
histogram.put(randomNode, 0);
}
histogram.put(randomNode, histogram.get(randomNode) + 1);
}
assertEquals("Random is not selecting the nodes it should", 2, histogram.size());
}
use of org.apache.commons.math3.stat.inference.ChiSquareTest in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
Aggregations