Search in sources :

Example 1 with AndersonDarlingTest

use of edu.cmu.tetrad.data.AndersonDarlingTest in project tetrad by cmu-phil.

the class NormalityTests method runNormalityTests.

/**
 * Constructs a readable table of normality test results
 */
public static String runNormalityTests(DataSet dataSet, ContinuousVariable variable) {
    NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();
    String result = "Normality Tests for: " + variable.getName() + " (sample size:" + dataSet.getNumRows() + ")";
    int lengthOfTitle = result.length();
    result += "\n";
    for (int i = 0; i < lengthOfTitle; i++) {
        result += "-";
    }
    result += "\n\nKolmogorov Smirnov:\n--------------------------------\n";
    double[] ksResults = kolmogorovSmirnov(dataSet, variable);
    double ksStat = Math.round((ksResults[0] * 10000000.0)) / 10000000.0;
    result += "K-S Statistic: " + ksStat + "\n\n";
    result += "Significance Levels:\t.20\t.15\t.10\t.05\t.01\nK-S Critical Values:";
    result += "\t" + nf.format(ksResults[0]) + "\t" + nf.format(ksResults[1]) + "\t" + nf.format(ksResults[2]) + "\t" + nf.format(ksResults[3]) + "\t" + nf.format(ksResults[4]) + "\n";
    boolean testResult = false;
    String pass = "FAIL";
    if (ksResults[0] < ksResults[1])
        testResult = true;
    if (testResult)
        pass = "ACCEPT";
    else
        pass = "FAIL";
    result += "Test Result:\t\t" + pass;
    testResult = ksResults[0] < ksResults[2];
    if (testResult)
        pass = "ACCEPT";
    else
        pass = "FAIL";
    result += "\t" + pass;
    testResult = ksResults[0] < ksResults[3];
    if (testResult)
        pass = "ACCEPT";
    else
        pass = "FAIL";
    result += "\t" + pass;
    testResult = ksResults[0] < ksResults[4];
    if (testResult)
        pass = "ACCEPT";
    else
        pass = "FAIL";
    result += "\t" + pass;
    testResult = ksResults[0] < ksResults[5];
    if (testResult)
        pass = "ACCEPT";
    else
        pass = "FAIL";
    result += "\t" + pass;
    testResult = false;
    result += "\n\nH0 = " + variable + " is Normal.\n";
    result += "(Normal if ACCEPT.)\n";
    result += "\n\n";
    result += "Anderson Darling Test:\n";
    result += "---------------------\n";
    int column = dataSet.getVariables().indexOf(variable);
    double[] data = dataSet.getDoubleData().getColumn(column).toArray();
    AndersonDarlingTest andersonDarlingTest = new AndersonDarlingTest(data);
    result += "A^2 = " + nf.format(andersonDarlingTest.getASquared()) + "\n";
    result += "A^2* = " + nf.format(andersonDarlingTest.getASquaredStar()) + "\n";
    result += "p = " + nf.format(andersonDarlingTest.getP()) + "\n";
    result += "\nH0 = " + variable + " is Non-normal.";
    result += "\n(Normal if p > alpha.)\n";
    return result;
}
Also used : AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) NumberFormat(java.text.NumberFormat)

Example 2 with AndersonDarlingTest

use of edu.cmu.tetrad.data.AndersonDarlingTest in project tetrad by cmu-phil.

the class LingamPattern2 method getScore1.

// =============================PRIVATE METHODS=========================//
private Score getScore1(Graph dag, List<TetradMatrix> data, List<Node> variables) {
    // System.out.println("Scoring DAG: " + dag);
    List<Regression> regressions = new ArrayList<>();
    for (TetradMatrix _data : data) {
        regressions.add(new RegressionDataset(_data, variables));
    }
    int totalSampleSize = 0;
    for (TetradMatrix _data : data) {
        totalSampleSize += _data.rows();
    }
    int numCols = data.get(0).columns();
    List<Node> nodes = dag.getNodes();
    double score = 0.0;
    double[] pValues = new double[nodes.size()];
    TetradMatrix absoluteStandardizedResiduals = new TetradMatrix(totalSampleSize, numCols);
    for (int i = 0; i < nodes.size(); i++) {
        List<Double> _absoluteStandardizedResiduals = new ArrayList<>();
        for (int j = 0; j < data.size(); j++) {
            Node _target = nodes.get(i);
            List<Node> _regressors = dag.getParents(_target);
            Node target = getVariable(variables, _target.getName());
            List<Node> regressors = new ArrayList<>();
            for (Node _regressor : _regressors) {
                Node variable = getVariable(variables, _regressor.getName());
                regressors.add(variable);
            }
            RegressionResult result = regressions.get(j).regress(target, regressors);
            TetradVector residualsColumn = result.getResiduals();
            DoubleArrayList _absoluteStandardizedResidualsColumn = new DoubleArrayList(residualsColumn.toArray());
            double mean = Descriptive.mean(_absoluteStandardizedResidualsColumn);
            double std = Descriptive.standardDeviation(Descriptive.variance(_absoluteStandardizedResidualsColumn.size(), Descriptive.sum(_absoluteStandardizedResidualsColumn), Descriptive.sumOfSquares(_absoluteStandardizedResidualsColumn)));
            for (int i2 = 0; i2 < _absoluteStandardizedResidualsColumn.size(); i2++) {
                _absoluteStandardizedResidualsColumn.set(i2, (_absoluteStandardizedResidualsColumn.get(i2) - mean) / std);
                _absoluteStandardizedResidualsColumn.set(i2, Math.abs(_absoluteStandardizedResidualsColumn.get(i2)));
            }
            for (int k = 0; k < _absoluteStandardizedResidualsColumn.size(); k++) {
                _absoluteStandardizedResiduals.add(_absoluteStandardizedResidualsColumn.get(k));
            }
        }
        DoubleArrayList absoluteStandardResidualsList = new DoubleArrayList(absoluteStandardizedResiduals.getColumn(i).toArray());
        for (int k = 0; k < _absoluteStandardizedResiduals.size(); k++) {
            absoluteStandardizedResiduals.set(k, i, _absoluteStandardizedResiduals.get(k));
        }
        double _mean = Descriptive.mean(absoluteStandardResidualsList);
        double diff = _mean - Math.sqrt(2.0 / Math.PI);
        score += diff * diff;
    }
    for (int j = 0; j < absoluteStandardizedResiduals.columns(); j++) {
        double[] x = absoluteStandardizedResiduals.getColumn(j).toArray();
        double p = new AndersonDarlingTest(x).getP();
        pValues[j] = p;
    }
    return new Score(score, pValues);
}
Also used : Regression(edu.cmu.tetrad.regression.Regression) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) TetradVector(edu.cmu.tetrad.util.TetradVector) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 3 with AndersonDarlingTest

use of edu.cmu.tetrad.data.AndersonDarlingTest in project tetrad by cmu-phil.

the class LingamPattern method getScore.

// =============================PRIVATE METHODS=========================//
private Score getScore(Graph dag, TetradMatrix data, List<Node> variables) {
    // System.out.println("Scoring DAG: " + dag);
    Regression regression = new RegressionDataset(data, variables);
    List<Node> nodes = dag.getNodes();
    double score = 0.0;
    double[] pValues = new double[nodes.size()];
    TetradMatrix residuals = new TetradMatrix(data.rows(), data.columns());
    for (int i = 0; i < nodes.size(); i++) {
        Node _target = nodes.get(i);
        List<Node> _regressors = dag.getParents(_target);
        Node target = getVariable(variables, _target.getName());
        List<Node> regressors = new ArrayList<>();
        for (Node _regressor : _regressors) {
            Node variable = getVariable(variables, _regressor.getName());
            regressors.add(variable);
        }
        RegressionResult result = regression.regress(target, regressors);
        TetradVector residualsColumn = result.getResiduals();
        // residuals.viewColumn(i).assign(residualsColumn);
        residuals.assignColumn(i, residualsColumn);
        DoubleArrayList residualsArray = new DoubleArrayList(residualsColumn.toArray());
        double mean = Descriptive.mean(residualsArray);
        double std = Descriptive.standardDeviation(Descriptive.variance(residualsArray.size(), Descriptive.sum(residualsArray), Descriptive.sumOfSquares(residualsArray)));
        for (int i2 = 0; i2 < residualsArray.size(); i2++) {
            residualsArray.set(i2, (residualsArray.get(i2) - mean) / std);
            residualsArray.set(i2, Math.abs(residualsArray.get(i2)));
        }
        double _mean = Descriptive.mean(residualsArray);
        double diff = _mean - Math.sqrt(2.0 / Math.PI);
        score += diff * diff;
    }
    for (int j = 0; j < residuals.columns(); j++) {
        double[] x = residuals.getColumn(j).toArray();
        double p = new AndersonDarlingTest(x).getP();
        pValues[j] = p;
    }
    return new Score(score, pValues);
}
Also used : Regression(edu.cmu.tetrad.regression.Regression) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) TetradVector(edu.cmu.tetrad.util.TetradVector) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 4 with AndersonDarlingTest

use of edu.cmu.tetrad.data.AndersonDarlingTest in project tetrad by cmu-phil.

the class Lofs method andersonDarlingPASquareStarB.

private double andersonDarlingPASquareStarB(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<>();
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    double sum = 0.0;
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        double mean = Descriptive.mean(_residualsSingleDataset);
        double std = Descriptive.standardDeviation(Descriptive.variance(_residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset)));
        // of the individual columns. http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std);
            if (isMeanCenterResiduals()) {
                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
            }
        }
        double[] _f = new double[_residuals.size()];
        for (int k = 0; k < _residuals.size(); k++) {
            _f[k] = _residuals.get(k);
        }
        sum += new AndersonDarlingTest(_f).getASquaredStar();
    }
    return sum / dataSets.size();
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 5 with AndersonDarlingTest

use of edu.cmu.tetrad.data.AndersonDarlingTest in project tetrad by cmu-phil.

the class Lofs method andersonDarlingPASquareStar.

private double andersonDarlingPASquareStar(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<>();
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        double mean = Descriptive.mean(_residualsSingleDataset);
        double std = Descriptive.standardDeviation(Descriptive.variance(_residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset)));
        // of the individual columns. http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std);
            if (isMeanCenterResiduals()) {
                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
            }
        }
        for (int k = 0; k < _residualsSingleDataset.size(); k++) {
            _residuals.add(_residualsSingleDataset.get(k));
        }
    }
    double[] _f = new double[_residuals.size()];
    for (int k = 0; k < _residuals.size(); k++) {
        _f[k] = _residuals.get(k);
    }
    return new AndersonDarlingTest(_f).getASquaredStar();
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Aggregations

AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)10 DoubleArrayList (cern.colt.list.DoubleArrayList)8 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)8 TetradVector (edu.cmu.tetrad.util.TetradVector)5 ArrayList (java.util.ArrayList)5 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 Regression (edu.cmu.tetrad.regression.Regression)3 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)3 NumberFormat (java.text.NumberFormat)1 Test (org.junit.Test)1