use of org.apache.commons.math3.distribution.ChiSquaredDistribution in project GDSC-SMLM by aherbert.
the class ChiSquaredDistributionTableTest method canComputeProbability.
//@formatter:on
@Test
public void canComputeProbability() {
for (int df : new int[] { 5, 10 }) {
double o, e, chi = 0;
ChiSquaredDistribution d = new ChiSquaredDistribution(null, df);
o = ChiSquaredDistributionTable.computePValue(chi, df);
e = d.cumulativeProbability(chi);
Assert.assertEquals(e, o, 1e-10);
chi = 1;
for (int i = 0; i < 10; i++, chi *= 2) {
o = ChiSquaredDistributionTable.computePValue(chi, df);
e = d.cumulativeProbability(chi);
Assert.assertEquals(e, o, 1e-10);
o = ChiSquaredDistributionTable.computeQValue(chi, df);
e = 1 - e;
Assert.assertEquals(e, o, 1e-10);
}
}
}
use of org.apache.commons.math3.distribution.ChiSquaredDistribution in project incubator-systemml by apache.
the class ParameterizedBuiltin method computeFromDistribution.
/**
* Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
*
* @param dcode probablility distribution code
* @param params map of parameters
* @param inverse true if inverse
* @return cdf or inverse cdf
*/
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
// given value is "quantile" when inverse=false, and it is "probability" when inverse=true
double val = Double.parseDouble(params.get("target"));
boolean lowertail = true;
if (params.get("lower.tail") != null) {
lowertail = Boolean.parseBoolean(params.get("lower.tail"));
}
AbstractRealDistribution distFunction = null;
switch(dcode) {
case NORMAL:
// default values for mean and sd
double mean = 0.0, sd = 1.0;
String mean_s = params.get("mean"), sd_s = params.get("sd");
if (mean_s != null)
mean = Double.parseDouble(mean_s);
if (sd_s != null)
sd = Double.parseDouble(sd_s);
if (sd <= 0)
throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
distFunction = new NormalDistribution(mean, sd);
break;
case EXP:
// default value for 1/mean or rate
double exp_rate = 1.0;
if (params.get("rate") != null)
exp_rate = Double.parseDouble(params.get("rate"));
if (exp_rate <= 0) {
throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
}
// For exponential distribution: mean = 1/rate
distFunction = new ExponentialDistribution(1.0 / exp_rate);
break;
case CHISQ:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
}
int df = UtilFunctions.parseToInt(params.get("df"));
if (df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
}
distFunction = new ChiSquaredDistribution(df);
break;
case F:
if (params.get("df1") == null || params.get("df2") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
}
int df1 = UtilFunctions.parseToInt(params.get("df1"));
int df2 = UtilFunctions.parseToInt(params.get("df2"));
if (df1 <= 0 || df2 <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
}
distFunction = new FDistribution(df1, df2);
break;
case T:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
}
int t_df = UtilFunctions.parseToInt(params.get("df"));
if (t_df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
}
distFunction = new TDistribution(t_df);
break;
default:
throw new DMLRuntimeException("Invalid distribution code: " + dcode);
}
double ret = Double.NaN;
if (inverse) {
// inverse cdf
ret = distFunction.inverseCumulativeProbability(val);
} else if (lowertail) {
// cdf (lowertail)
ret = distFunction.cumulativeProbability(val);
} else {
// cdf (upper tail)
// TODO: more accurate distribution-specific computation of upper tail probabilities
ret = 1.0 - distFunction.cumulativeProbability(val);
}
return ret;
}
use of org.apache.commons.math3.distribution.ChiSquaredDistribution in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
use of org.apache.commons.math3.distribution.ChiSquaredDistribution in project systemml by apache.
the class ParameterizedBuiltin method computeFromDistribution.
/**
* Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
*
* @param dcode probablility distribution code
* @param params map of parameters
* @param inverse true if inverse
* @return cdf or inverse cdf
*/
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
// given value is "quantile" when inverse=false, and it is "probability" when inverse=true
double val = Double.parseDouble(params.get("target"));
boolean lowertail = true;
if (params.get("lower.tail") != null) {
lowertail = Boolean.parseBoolean(params.get("lower.tail"));
}
AbstractRealDistribution distFunction = null;
switch(dcode) {
case NORMAL:
// default values for mean and sd
double mean = 0.0, sd = 1.0;
String mean_s = params.get("mean"), sd_s = params.get("sd");
if (mean_s != null)
mean = Double.parseDouble(mean_s);
if (sd_s != null)
sd = Double.parseDouble(sd_s);
if (sd <= 0)
throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
distFunction = new NormalDistribution(mean, sd);
break;
case EXP:
// default value for 1/mean or rate
double exp_rate = 1.0;
if (params.get("rate") != null)
exp_rate = Double.parseDouble(params.get("rate"));
if (exp_rate <= 0) {
throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
}
// For exponential distribution: mean = 1/rate
distFunction = new ExponentialDistribution(1.0 / exp_rate);
break;
case CHISQ:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
}
int df = UtilFunctions.parseToInt(params.get("df"));
if (df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
}
distFunction = new ChiSquaredDistribution(df);
break;
case F:
if (params.get("df1") == null || params.get("df2") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
}
int df1 = UtilFunctions.parseToInt(params.get("df1"));
int df2 = UtilFunctions.parseToInt(params.get("df2"));
if (df1 <= 0 || df2 <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
}
distFunction = new FDistribution(df1, df2);
break;
case T:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
}
int t_df = UtilFunctions.parseToInt(params.get("df"));
if (t_df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
}
distFunction = new TDistribution(t_df);
break;
default:
throw new DMLRuntimeException("Invalid distribution code: " + dcode);
}
double ret = Double.NaN;
if (inverse) {
// inverse cdf
ret = distFunction.inverseCumulativeProbability(val);
} else if (lowertail) {
// cdf (lowertail)
ret = distFunction.cumulativeProbability(val);
} else {
// cdf (upper tail)
// TODO: more accurate distribution-specific computation of upper tail probabilities
ret = 1.0 - distFunction.cumulativeProbability(val);
}
return ret;
}
use of org.apache.commons.math3.distribution.ChiSquaredDistribution in project tetrad by cmu-phil.
the class Mimbuild2 method getCov.
private TetradMatrix getCov(ICovarianceMatrix _measurescov, List<Node> latents, Node[][] indicators) {
if (latents.size() != indicators.length) {
throw new IllegalArgumentException();
}
TetradMatrix measurescov = _measurescov.getMatrix();
TetradMatrix latentscov = new TetradMatrix(latents.size(), latents.size());
for (int i = 0; i < latentscov.rows(); i++) {
for (int j = i; j < latentscov.columns(); j++) {
if (i == j)
latentscov.set(i, j, 1.0);
else {
double v = .5;
latentscov.set(i, j, v);
latentscov.set(j, i, v);
}
}
}
double[][] loadings = new double[indicators.length][];
for (int i = 0; i < indicators.length; i++) {
loadings[i] = new double[indicators[i].length];
}
for (int i = 0; i < indicators.length; i++) {
loadings[i] = new double[indicators[i].length];
for (int j = 0; j < indicators[i].length; j++) {
loadings[i][j] = .5;
}
}
int[][] indicatorIndices = new int[indicators.length][];
List<Node> measures = _measurescov.getVariables();
for (int i = 0; i < indicators.length; i++) {
indicatorIndices[i] = new int[indicators[i].length];
for (int j = 0; j < indicators[i].length; j++) {
indicatorIndices[i][j] = measures.indexOf(indicators[i][j]);
}
}
// Variances of the measures.
double[] delta = new double[measurescov.rows()];
for (int i = 0; i < delta.length; i++) {
delta[i] = 1;
}
int numNonMeasureVarianceParams = 0;
for (int i = 0; i < latentscov.rows(); i++) {
for (int j = i; j < latentscov.columns(); j++) {
numNonMeasureVarianceParams++;
}
}
for (int i = 0; i < indicators.length; i++) {
numNonMeasureVarianceParams += indicators[i].length;
}
double[] allParams1 = getAllParams(indicators, latentscov, loadings, delta);
optimizeNonMeasureVariancesQuick(indicators, measurescov, latentscov, loadings, indicatorIndices);
// for (int i = 0; i < 10; i++) {
// optimizeNonMeasureVariancesConditionally(indicators, measurescov, latentscov, loadings, indicatorIndices, delta);
// optimizeMeasureVariancesConditionally(measurescov, latentscov, loadings, indicatorIndices, delta);
//
// double[] allParams2 = getAllParams(indicators, latentscov, loadings, delta);
// if (distance(allParams1, allParams2) < epsilon) break;
// allParams1 = allParams2;
// }
this.numParams = allParams1.length;
// // Very slow but could be done alone.
optimizeAllParamsSimultaneously(indicators, measurescov, latentscov, loadings, indicatorIndices, delta);
double N = _measurescov.getSampleSize();
int p = _measurescov.getDimension();
int df = (p) * (p + 1) / 2 - (numParams);
double x = (N - 1) * minimum;
this.pValue = 1.0 - new ChiSquaredDistribution(df).cumulativeProbability(x);
return latentscov;
}
Aggregations