Search in sources :

Example 1 with FDistribution

use of org.apache.commons.math3.distribution.FDistribution in project knime-core by knime.

the class LeveneTestStatistics method getLeveneTestTwoGroupsCells.

/**
 * Get the test result of the Levene test. This is an optimized version for
 * two groups.
 * @return the Levene test
 */
public List<List<DataCell>> getLeveneTestTwoGroupsCells() {
    SummaryStatistics statsX = m_denStats.get(0);
    SummaryStatistics statsY = m_denStats.get(1);
    // overall sample mean
    double m = m_lstats.getMean();
    // first sample mean
    double m1 = statsX.getMean();
    // second sample mean
    double m2 = statsY.getMean();
    // first sample variance
    double v1 = statsX.getVariance();
    // second sample variance
    double v2 = statsY.getVariance();
    // first sample count
    double n1 = statsX.getN();
    // second sample count
    double n2 = statsY.getN();
    // Levene's test
    double num = n1 * (m1 - m) * (m1 - m) + n2 * (m2 - m) * (m2 - m);
    double den = (n1 - 1) * v1 + (n2 - 1) * v2;
    double L = (n1 + n2 - 2) / den * num;
    long df1 = 1;
    long df2 = (long) n1 + (long) n2 - 2;
    FDistribution distribution = new FDistribution(df1, df2);
    double pValue = 1 - distribution.cumulativeProbability(L);
    List<DataCell> cells = new ArrayList<DataCell>();
    cells.add(new StringCell(m_column));
    cells.add(new DoubleCell(L));
    cells.add(new IntCell((int) df1));
    cells.add(new IntCell((int) df2));
    cells.add(new DoubleCell(pValue));
    return Collections.singletonList(cells);
}
Also used : StringCell(org.knime.core.data.def.StringCell) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) DataCell(org.knime.core.data.DataCell) FDistribution(org.apache.commons.math3.distribution.FDistribution) IntCell(org.knime.core.data.def.IntCell)

Example 2 with FDistribution

use of org.apache.commons.math3.distribution.FDistribution in project knime-core by knime.

the class LeveneTestStatistics method getTTestCells.

/**
 * Get the test result of the Levene test.
 * @return the Levene test
 */
public List<List<DataCell>> getTTestCells() {
    if (m_groups.size() == 2) {
        // optimized version for two groups
        return getLeveneTestTwoGroupsCells();
    }
    double num = 0;
    double Zdd = m_lstats.getMean();
    int k = m_groups.size();
    for (int i = 0; i < k; i++) {
        SummaryStatistics statsGi = m_levenePre.getLgstats().get(i);
        double ni = statsGi.getN();
        double Zidot = statsGi.getMean();
        num += ni * (Zidot - Zdd) * (Zidot - Zdd);
    }
    double den = 0;
    for (int i = 0; i < k; i++) {
        SummaryStatistics stats2Gi = m_denStats.get(i);
        den += stats2Gi.getSumsq();
    }
    double L = (m_lstats.getN() - k) / (double) (k - 1) * num / den;
    long df1 = k - 1;
    long df2 = m_lstats.getN() - k;
    FDistribution distribution = new FDistribution(df1, df2);
    double pValue = 1 - distribution.cumulativeProbability(L);
    List<DataCell> cells = new ArrayList<DataCell>();
    cells.add(new StringCell(m_column));
    cells.add(new DoubleCell(L));
    cells.add(new IntCell((int) df1));
    cells.add(new IntCell((int) df2));
    cells.add(new DoubleCell(pValue));
    return Collections.singletonList(cells);
}
Also used : StringCell(org.knime.core.data.def.StringCell) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) DataCell(org.knime.core.data.DataCell) FDistribution(org.apache.commons.math3.distribution.FDistribution) IntCell(org.knime.core.data.def.IntCell)

Example 3 with FDistribution

use of org.apache.commons.math3.distribution.FDistribution in project incubator-systemml by apache.

the class ParameterizedBuiltin method computeFromDistribution.

/**
 * Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
 *
 * @param dcode probablility distribution code
 * @param params map of parameters
 * @param inverse true if inverse
 * @return cdf or inverse cdf
 */
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
    // given value is "quantile" when inverse=false, and it is "probability" when inverse=true
    double val = Double.parseDouble(params.get("target"));
    boolean lowertail = true;
    if (params.get("lower.tail") != null) {
        lowertail = Boolean.parseBoolean(params.get("lower.tail"));
    }
    AbstractRealDistribution distFunction = null;
    switch(dcode) {
        case NORMAL:
            // default values for mean and sd
            double mean = 0.0, sd = 1.0;
            String mean_s = params.get("mean"), sd_s = params.get("sd");
            if (mean_s != null)
                mean = Double.parseDouble(mean_s);
            if (sd_s != null)
                sd = Double.parseDouble(sd_s);
            if (sd <= 0)
                throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
            distFunction = new NormalDistribution(mean, sd);
            break;
        case EXP:
            // default value for 1/mean or rate
            double exp_rate = 1.0;
            if (params.get("rate") != null)
                exp_rate = Double.parseDouble(params.get("rate"));
            if (exp_rate <= 0) {
                throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
            }
            // For exponential distribution: mean = 1/rate
            distFunction = new ExponentialDistribution(1.0 / exp_rate);
            break;
        case CHISQ:
            if (params.get("df") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
            }
            int df = UtilFunctions.parseToInt(params.get("df"));
            if (df <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
            }
            distFunction = new ChiSquaredDistribution(df);
            break;
        case F:
            if (params.get("df1") == null || params.get("df2") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
            }
            int df1 = UtilFunctions.parseToInt(params.get("df1"));
            int df2 = UtilFunctions.parseToInt(params.get("df2"));
            if (df1 <= 0 || df2 <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
            }
            distFunction = new FDistribution(df1, df2);
            break;
        case T:
            if (params.get("df") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
            }
            int t_df = UtilFunctions.parseToInt(params.get("df"));
            if (t_df <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
            }
            distFunction = new TDistribution(t_df);
            break;
        default:
            throw new DMLRuntimeException("Invalid distribution code: " + dcode);
    }
    double ret = Double.NaN;
    if (inverse) {
        // inverse cdf
        ret = distFunction.inverseCumulativeProbability(val);
    } else if (lowertail) {
        // cdf (lowertail)
        ret = distFunction.cumulativeProbability(val);
    } else {
        // cdf (upper tail)
        // TODO: more accurate distribution-specific computation of upper tail probabilities
        ret = 1.0 - distFunction.cumulativeProbability(val);
    }
    return ret;
}
Also used : AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) ChiSquaredDistribution(org.apache.commons.math3.distribution.ChiSquaredDistribution) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) ExponentialDistribution(org.apache.commons.math3.distribution.ExponentialDistribution) TDistribution(org.apache.commons.math3.distribution.TDistribution) FDistribution(org.apache.commons.math3.distribution.FDistribution) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with FDistribution

use of org.apache.commons.math3.distribution.FDistribution in project systemml by apache.

the class ParameterizedBuiltin method computeFromDistribution.

/**
 * Helper function to compute distribution-specific cdf (both lowertail and uppertail) and inverse cdf.
 *
 * @param dcode probablility distribution code
 * @param params map of parameters
 * @param inverse true if inverse
 * @return cdf or inverse cdf
 */
private static double computeFromDistribution(ProbabilityDistributionCode dcode, HashMap<String, String> params, boolean inverse) {
    // given value is "quantile" when inverse=false, and it is "probability" when inverse=true
    double val = Double.parseDouble(params.get("target"));
    boolean lowertail = true;
    if (params.get("lower.tail") != null) {
        lowertail = Boolean.parseBoolean(params.get("lower.tail"));
    }
    AbstractRealDistribution distFunction = null;
    switch(dcode) {
        case NORMAL:
            // default values for mean and sd
            double mean = 0.0, sd = 1.0;
            String mean_s = params.get("mean"), sd_s = params.get("sd");
            if (mean_s != null)
                mean = Double.parseDouble(mean_s);
            if (sd_s != null)
                sd = Double.parseDouble(sd_s);
            if (sd <= 0)
                throw new DMLRuntimeException("Standard deviation for Normal distribution must be positive (" + sd + ")");
            distFunction = new NormalDistribution(mean, sd);
            break;
        case EXP:
            // default value for 1/mean or rate
            double exp_rate = 1.0;
            if (params.get("rate") != null)
                exp_rate = Double.parseDouble(params.get("rate"));
            if (exp_rate <= 0) {
                throw new DMLRuntimeException("Rate for Exponential distribution must be positive (" + exp_rate + ")");
            }
            // For exponential distribution: mean = 1/rate
            distFunction = new ExponentialDistribution(1.0 / exp_rate);
            break;
        case CHISQ:
            if (params.get("df") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom must be specified for chi-squared distribution " + "(e.g., q=qchisq(0.5, df=20); p=pchisq(target=q, df=1.2))");
            }
            int df = UtilFunctions.parseToInt(params.get("df"));
            if (df <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for chi-squared distribution must be positive (" + df + ")");
            }
            distFunction = new ChiSquaredDistribution(df);
            break;
        case F:
            if (params.get("df1") == null || params.get("df2") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom must be specified for F distribution " + "(e.g., q = qf(target=0.5, df1=20, df2=30); p=pf(target=q, df1=20, df2=30))");
            }
            int df1 = UtilFunctions.parseToInt(params.get("df1"));
            int df2 = UtilFunctions.parseToInt(params.get("df2"));
            if (df1 <= 0 || df2 <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for F distribution must be positive (" + df1 + "," + df2 + ")");
            }
            distFunction = new FDistribution(df1, df2);
            break;
        case T:
            if (params.get("df") == null) {
                throw new DMLRuntimeException("" + "Degrees of freedom is needed to compute probabilities from t distribution " + "(e.g., q = qt(target=0.5, df=10); p = pt(target=q, df=10))");
            }
            int t_df = UtilFunctions.parseToInt(params.get("df"));
            if (t_df <= 0) {
                throw new DMLRuntimeException("Degrees of Freedom for t distribution must be positive (" + t_df + ")");
            }
            distFunction = new TDistribution(t_df);
            break;
        default:
            throw new DMLRuntimeException("Invalid distribution code: " + dcode);
    }
    double ret = Double.NaN;
    if (inverse) {
        // inverse cdf
        ret = distFunction.inverseCumulativeProbability(val);
    } else if (lowertail) {
        // cdf (lowertail)
        ret = distFunction.cumulativeProbability(val);
    } else {
        // cdf (upper tail)
        // TODO: more accurate distribution-specific computation of upper tail probabilities
        ret = 1.0 - distFunction.cumulativeProbability(val);
    }
    return ret;
}
Also used : AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) ChiSquaredDistribution(org.apache.commons.math3.distribution.ChiSquaredDistribution) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) ExponentialDistribution(org.apache.commons.math3.distribution.ExponentialDistribution) TDistribution(org.apache.commons.math3.distribution.TDistribution) FDistribution(org.apache.commons.math3.distribution.FDistribution) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

FDistribution (org.apache.commons.math3.distribution.FDistribution)4 ArrayList (java.util.ArrayList)2 AbstractRealDistribution (org.apache.commons.math3.distribution.AbstractRealDistribution)2 ChiSquaredDistribution (org.apache.commons.math3.distribution.ChiSquaredDistribution)2 ExponentialDistribution (org.apache.commons.math3.distribution.ExponentialDistribution)2 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)2 TDistribution (org.apache.commons.math3.distribution.TDistribution)2 SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)2 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2 DataCell (org.knime.core.data.DataCell)2 DoubleCell (org.knime.core.data.def.DoubleCell)2 IntCell (org.knime.core.data.def.IntCell)2 StringCell (org.knime.core.data.def.StringCell)2