use of org.apache.commons.math3.distribution.FDistribution in project knime-core by knime.
the class LeveneTestStatistics method getLeveneTestTwoGroupsCells.
/**
* Get the test result of the Levene test. This is an optimized version for
* two groups.
* @return the Levene test
*/
public List<List<DataCell>> getLeveneTestTwoGroupsCells() {
SummaryStatistics statsX = m_denStats.get(0);
SummaryStatistics statsY = m_denStats.get(1);
// overall sample mean
double m = m_lstats.getMean();
// first sample mean
double m1 = statsX.getMean();
// second sample mean
double m2 = statsY.getMean();
// first sample variance
double v1 = statsX.getVariance();
// second sample variance
double v2 = statsY.getVariance();
// first sample count
double n1 = statsX.getN();
// second sample count
double n2 = statsY.getN();
// Levene's test
double num = n1 * (m1 - m) * (m1 - m) + n2 * (m2 - m) * (m2 - m);
double den = (n1 - 1) * v1 + (n2 - 1) * v2;
double L = (n1 + n2 - 2) / den * num;
long df1 = 1;
long df2 = (long) n1 + (long) n2 - 2;
FDistribution distribution = new FDistribution(df1, df2);
double pValue = 1 - distribution.cumulativeProbability(L);
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(m_column));
cells.add(new DoubleCell(L));
cells.add(new IntCell((int) df1));
cells.add(new IntCell((int) df2));
cells.add(new DoubleCell(pValue));
return Collections.singletonList(cells);
}
use of org.apache.commons.math3.distribution.FDistribution in project knime-core by knime.
the class LeveneTestStatistics method getTTestCells.
/**
* Get the test result of the Levene test.
* @return the Levene test
*/
public List<List<DataCell>> getTTestCells() {
if (m_groups.size() == 2) {
// optimized version for two groups
return getLeveneTestTwoGroupsCells();
}
double num = 0;
double Zdd = m_lstats.getMean();
int k = m_groups.size();
for (int i = 0; i < k; i++) {
SummaryStatistics statsGi = m_levenePre.getLgstats().get(i);
double ni = statsGi.getN();
double Zidot = statsGi.getMean();
num += ni * (Zidot - Zdd) * (Zidot - Zdd);
}
double den = 0;
for (int i = 0; i < k; i++) {
SummaryStatistics stats2Gi = m_denStats.get(i);
den += stats2Gi.getSumsq();
}
double L = (m_lstats.getN() - k) / (double) (k - 1) * num / den;
long df1 = k - 1;
long df2 = m_lstats.getN() - k;
FDistribution distribution = new FDistribution(df1, df2);
double pValue = 1 - distribution.cumulativeProbability(L);
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(m_column));
cells.add(new DoubleCell(L));
cells.add(new IntCell((int) df1));
cells.add(new IntCell((int) df2));
cells.add(new DoubleCell(pValue));
return Collections.singletonList(cells);
}
use of org.apache.commons.math3.distribution.FDistribution in project incubator-systemml by apache.
the class ParameterizedBuiltin method computeFromDistribution.
/**
* Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
*
* @param dcode probablility distribution code
* @param params map of parameters
* @param inverse true if inverse
* @return cdf or inverse cdf
*/
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
// given value is "quantile" when inverse=false, and it is "probability" when inverse=true
double val = Double.parseDouble(params.get("target"));
boolean lowertail = true;
if (params.get("lower.tail") != null) {
lowertail = Boolean.parseBoolean(params.get("lower.tail"));
}
AbstractRealDistribution distFunction = null;
switch(dcode) {
case NORMAL:
// default values for mean and sd
double mean = 0.0, sd = 1.0;
String mean_s = params.get("mean"), sd_s = params.get("sd");
if (mean_s != null)
mean = Double.parseDouble(mean_s);
if (sd_s != null)
sd = Double.parseDouble(sd_s);
if (sd <= 0)
throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
distFunction = new NormalDistribution(mean, sd);
break;
case EXP:
// default value for 1/mean or rate
double exp_rate = 1.0;
if (params.get("rate") != null)
exp_rate = Double.parseDouble(params.get("rate"));
if (exp_rate <= 0) {
throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
}
// For exponential distribution: mean = 1/rate
distFunction = new ExponentialDistribution(1.0 / exp_rate);
break;
case CHISQ:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
}
int df = UtilFunctions.parseToInt(params.get("df"));
if (df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
}
distFunction = new ChiSquaredDistribution(df);
break;
case F:
if (params.get("df1") == null || params.get("df2") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
}
int df1 = UtilFunctions.parseToInt(params.get("df1"));
int df2 = UtilFunctions.parseToInt(params.get("df2"));
if (df1 <= 0 || df2 <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
}
distFunction = new FDistribution(df1, df2);
break;
case T:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
}
int t_df = UtilFunctions.parseToInt(params.get("df"));
if (t_df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
}
distFunction = new TDistribution(t_df);
break;
default:
throw new DMLRuntimeException("Invalid distribution code: " + dcode);
}
double ret = Double.NaN;
if (inverse) {
// inverse cdf
ret = distFunction.inverseCumulativeProbability(val);
} else if (lowertail) {
// cdf (lowertail)
ret = distFunction.cumulativeProbability(val);
} else {
// cdf (upper tail)
// TODO: more accurate distribution-specific computation of upper tail probabilities
ret = 1.0 - distFunction.cumulativeProbability(val);
}
return ret;
}
use of org.apache.commons.math3.distribution.FDistribution in project systemml by apache.
the class ParameterizedBuiltin method computeFromDistribution.
/**
* Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
*
* @param dcode probablility distribution code
* @param params map of parameters
* @param inverse true if inverse
* @return cdf or inverse cdf
*/
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
// given value is "quantile" when inverse=false, and it is "probability" when inverse=true
double val = Double.parseDouble(params.get("target"));
boolean lowertail = true;
if (params.get("lower.tail") != null) {
lowertail = Boolean.parseBoolean(params.get("lower.tail"));
}
AbstractRealDistribution distFunction = null;
switch(dcode) {
case NORMAL:
// default values for mean and sd
double mean = 0.0, sd = 1.0;
String mean_s = params.get("mean"), sd_s = params.get("sd");
if (mean_s != null)
mean = Double.parseDouble(mean_s);
if (sd_s != null)
sd = Double.parseDouble(sd_s);
if (sd <= 0)
throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
distFunction = new NormalDistribution(mean, sd);
break;
case EXP:
// default value for 1/mean or rate
double exp_rate = 1.0;
if (params.get("rate") != null)
exp_rate = Double.parseDouble(params.get("rate"));
if (exp_rate <= 0) {
throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
}
// For exponential distribution: mean = 1/rate
distFunction = new ExponentialDistribution(1.0 / exp_rate);
break;
case CHISQ:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
}
int df = UtilFunctions.parseToInt(params.get("df"));
if (df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
}
distFunction = new ChiSquaredDistribution(df);
break;
case F:
if (params.get("df1") == null || params.get("df2") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
}
int df1 = UtilFunctions.parseToInt(params.get("df1"));
int df2 = UtilFunctions.parseToInt(params.get("df2"));
if (df1 <= 0 || df2 <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
}
distFunction = new FDistribution(df1, df2);
break;
case T:
if (params.get("df") == null) {
throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
}
int t_df = UtilFunctions.parseToInt(params.get("df"));
if (t_df <= 0) {
throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
}
distFunction = new TDistribution(t_df);
break;
default:
throw new DMLRuntimeException("Invalid distribution code: " + dcode);
}
double ret = Double.NaN;
if (inverse) {
// inverse cdf
ret = distFunction.inverseCumulativeProbability(val);
} else if (lowertail) {
// cdf (lowertail)
ret = distFunction.cumulativeProbability(val);
} else {
// cdf (upper tail)
// TODO: more accurate distribution-specific computation of upper tail probabilities
ret = 1.0 - distFunction.cumulativeProbability(val);
}
return ret;
}
Aggregations