Search in sources :

Example 11 with RegressionResult

use of edu.cmu.tetrad.regression.RegressionResult in project tetrad by cmu-phil.

the class LingamPattern method getScore.

// =============================PRIVATE METHODS=========================//
private Score getScore(Graph dag, TetradMatrix data, List<Node> variables) {
    // System.out.println("Scoring DAG: " + dag);
    Regression regression = new RegressionDataset(data, variables);
    List<Node> nodes = dag.getNodes();
    double score = 0.0;
    double[] pValues = new double[nodes.size()];
    TetradMatrix residuals = new TetradMatrix(data.rows(), data.columns());
    for (int i = 0; i < nodes.size(); i++) {
        Node _target = nodes.get(i);
        List<Node> _regressors = dag.getParents(_target);
        Node target = getVariable(variables, _target.getName());
        List<Node> regressors = new ArrayList<>();
        for (Node _regressor : _regressors) {
            Node variable = getVariable(variables, _regressor.getName());
            regressors.add(variable);
        }
        RegressionResult result = regression.regress(target, regressors);
        TetradVector residualsColumn = result.getResiduals();
        // residuals.viewColumn(i).assign(residualsColumn);
        residuals.assignColumn(i, residualsColumn);
        DoubleArrayList residualsArray = new DoubleArrayList(residualsColumn.toArray());
        double mean = Descriptive.mean(residualsArray);
        double std = Descriptive.standardDeviation(Descriptive.variance(residualsArray.size(), Descriptive.sum(residualsArray), Descriptive.sumOfSquares(residualsArray)));
        for (int i2 = 0; i2 < residualsArray.size(); i2++) {
            residualsArray.set(i2, (residualsArray.get(i2) - mean) / std);
            residualsArray.set(i2, Math.abs(residualsArray.get(i2)));
        }
        double _mean = Descriptive.mean(residualsArray);
        double diff = _mean - Math.sqrt(2.0 / Math.PI);
        score += diff * diff;
    }
    for (int j = 0; j < residuals.columns(); j++) {
        double[] x = residuals.getColumn(j).toArray();
        double p = new AndersonDarlingTest(x).getP();
        pValues[j] = p;
    }
    return new Score(score, pValues);
}
Also used : Regression(edu.cmu.tetrad.regression.Regression) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) TetradVector(edu.cmu.tetrad.util.TetradVector) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 12 with RegressionResult

use of edu.cmu.tetrad.regression.RegressionResult in project tetrad by cmu-phil.

the class Lofs method andersonDarlingPASquareStarB.

private double andersonDarlingPASquareStarB(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<>();
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    double sum = 0.0;
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        double mean = Descriptive.mean(_residualsSingleDataset);
        double std = Descriptive.standardDeviation(Descriptive.variance(_residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset)));
        // of the individual columns. http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std);
            if (isMeanCenterResiduals()) {
                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
            }
        }
        double[] _f = new double[_residuals.size()];
        for (int k = 0; k < _residuals.size(); k++) {
            _f[k] = _residuals.get(k);
        }
        sum += new AndersonDarlingTest(_f).getASquaredStar();
    }
    return sum / dataSets.size();
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 13 with RegressionResult

use of edu.cmu.tetrad.regression.RegressionResult in project tetrad by cmu-phil.

the class Lofs method residual.

private double[] residual(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<>();
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        double mean = Descriptive.mean(_residualsSingleDataset);
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std);
            _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
        // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)));
        }
        for (int k = 0; k < _residualsSingleDataset.size(); k++) {
            _residuals.add(_residualsSingleDataset.get(k));
        }
    }
    double[] _f = new double[_residuals.size()];
    for (int k = 0; k < _residuals.size(); k++) {
        _f[k] = _residuals.get(k);
    }
    return _f;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 14 with RegressionResult

use of edu.cmu.tetrad.regression.RegressionResult in project tetrad by cmu-phil.

the class Lofs method localScoreB.

private double localScoreB(Node node, List<Node> parents) {
    double score = 0.0;
    double maxScore = Double.NEGATIVE_INFINITY;
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        double mean = Descriptive.mean(_residualsSingleDataset);
        double std = Descriptive.standardDeviation(Descriptive.variance(_residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset)));
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std);
        }
        double[] _f = new double[_residualsSingleDataset.size()];
        for (int k = 0; k < _residualsSingleDataset.size(); k++) {
            _f[k] = _residualsSingleDataset.get(k);
        }
        DoubleArrayList f = new DoubleArrayList(_f);
        for (int k = 0; k < f.size(); k++) {
            f.set(k, Math.abs(f.get(k)));
        }
        double _mean = Descriptive.mean(f);
        double diff = _mean - Math.sqrt(2.0 / Math.PI);
        score += diff * diff;
        if (score > maxScore) {
            maxScore = score;
        }
    }
    double avg = score / dataSets.size();
    return avg;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 15 with RegressionResult

use of edu.cmu.tetrad.regression.RegressionResult in project tetrad by cmu-phil.

the class Lofs method andersonDarlingPASquareStar.

private double andersonDarlingPASquareStar(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<>();
    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<>();
    for (Node _regressor : _regressors) {
        Node variable = getVariable(variables, _regressor.getName());
        regressors.add(variable);
    }
    DATASET: for (int m = 0; m < dataSets.size(); m++) {
        RegressionResult result = regressions.get(m).regress(target, regressors);
        TetradVector residualsSingleDataset = result.getResiduals();
        for (int h = 0; h < residualsSingleDataset.size(); h++) {
            if (Double.isNaN(residualsSingleDataset.get(h))) {
                continue DATASET;
            }
        }
        DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray());
        double mean = Descriptive.mean(_residualsSingleDataset);
        double std = Descriptive.standardDeviation(Descriptive.variance(_residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset)));
        // of the individual columns. http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures
        for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
            // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std);
            if (isMeanCenterResiduals()) {
                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
            }
        }
        for (int k = 0; k < _residualsSingleDataset.size(); k++) {
            _residuals.add(_residualsSingleDataset.get(k));
        }
    }
    double[] _f = new double[_residuals.size()];
    for (int k = 0; k < _residuals.size(); k++) {
        _f[k] = _residuals.get(k);
    }
    return new AndersonDarlingTest(_f).getASquaredStar();
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) AndersonDarlingTest(edu.cmu.tetrad.data.AndersonDarlingTest) DoubleArrayList(cern.colt.list.DoubleArrayList) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Aggregations

RegressionResult (edu.cmu.tetrad.regression.RegressionResult)33 Regression (edu.cmu.tetrad.regression.Regression)17 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)16 ArrayList (java.util.ArrayList)15 DoubleArrayList (cern.colt.list.DoubleArrayList)11 TetradVector (edu.cmu.tetrad.util.TetradVector)9 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)8 Node (edu.cmu.tetrad.graph.Node)8 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)8 RegressionCovariance (edu.cmu.tetrad.regression.RegressionCovariance)3 CombinationGenerator (edu.cmu.tetrad.util.CombinationGenerator)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Test (org.junit.Test)2 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)1 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)1 CovarianceMatrix (edu.cmu.tetrad.data.CovarianceMatrix)1 GeneralAndersonDarlingTest (edu.cmu.tetrad.data.GeneralAndersonDarlingTest)1 ICovarianceMatrix (edu.cmu.tetrad.data.ICovarianceMatrix)1 EdgeListGraph (edu.cmu.tetrad.graph.EdgeListGraph)1 Graph (edu.cmu.tetrad.graph.Graph)1