Search in sources :

Example 1 with RegressionDataset

use of edu.cmu.tetrad.regression.RegressionDataset in project tetrad by cmu-phil.

the class ScatterPlot method getRegressionResult.

private RegressionResult getRegressionResult() {
    List<Node> regressors = new ArrayList<>();
    regressors.add(dataSet.getVariable(x));
    Node target = dataSet.getVariable(y);
    Regression regression = new RegressionDataset(dataSet);
    RegressionResult result = regression.regress(target, regressors);
    System.out.println(result);
    return result;
}
Also used : RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) Node(edu.cmu.tetrad.graph.Node) Regression(edu.cmu.tetrad.regression.Regression) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 2 with RegressionDataset

use of edu.cmu.tetrad.regression.RegressionDataset in project tetrad by cmu-phil.

the class ScatterPlotDisplayPanelOld method paintComponent.

/**
 * Paints the histogram and related items.
 */
public void paintComponent(Graphics graphics) {
    double least = this.scatterPlot.getMinSample();
    double greatest = this.scatterPlot.getMaxSample();
    String minStr = format.format(least);
    String maxStr = format.format(greatest);
    Graphics2D g2d = (Graphics2D) graphics;
    FontMetrics fontMetrics = g2d.getFontMetrics();
    int widthMinStr = fontMetrics.stringWidth(minStr);
    int widthMaxStr = fontMetrics.stringWidth(maxStr);
    int maxWidth = Math.max(widthMinStr, widthMaxStr);
    PADDINGLEFT = maxWidth;
    PADDINGOTHER = 50;
    HEIGHT = 600 + PADDINGOTHER;
    WIDTH = 600 + PADDINGLEFT;
    SPACE = 2;
    DASH = 10;
    setSize(new Dimension(WIDTH + 2 * SPACE, HEIGHT));
    // set up variables.
    this.rectMap.clear();
    int height = HEIGHT - PADDINGOTHER;
    // draw background/surrounding box.
    g2d.setColor(this.getBackground());
    g2d.fillRect(0, 0, WIDTH + 2 * SPACE, HEIGHT);
    g2d.setColor(Color.WHITE);
    g2d.fillRect(PADDINGLEFT, 0, (WIDTH + SPACE) - PADDINGLEFT, height);
    // border
    g2d.setColor(LINE_COLOR);
    g2d.drawRect(PADDINGLEFT, 0, (WIDTH + SPACE) - PADDINGLEFT, height - 2 * SPACE);
    // draw the buttom line
    g2d.setColor(LINE_COLOR);
    // double least = Math.floor(this.scatterPlot.getMinSample());
    // double greatest = Math.ceil(this.scatterPlot.getMaxSample());
    g2d.drawString(minStr, PADDINGLEFT + 5, height + 15);
    g2d.drawLine(PADDINGLEFT, height + DASH, PADDINGOTHER, height);
    g2d.drawString(maxStr, WIDTH - widthMaxStr, height + 15);
    g2d.drawLine(WIDTH + SPACE, height + DASH, WIDTH + SPACE, height);
    // int size = (WIDTH - PADDINGLEFT) / 4;
    // draw the side line
    g2d.setColor(LINE_COLOR);
    int topY = 0;
    // String top = "" + Math.ceil(this.scatterPlot.getMaxSample());
    g2d.drawString(maxStr, PADDINGLEFT - fontMetrics.stringWidth(maxStr), topY + 10);
    g2d.drawLine(PADDINGLEFT - DASH, topY, PADDINGOTHER, topY);
    g2d.drawString(minStr, PADDINGLEFT - fontMetrics.stringWidth(minStr), height - 2);
    g2d.drawLine(PADDINGLEFT - DASH, height, PADDINGOTHER, height);
    // draw the origin lines if they should go on the screen -- first find out where they exist
    if (this.scatterPlot.getMinSample() < 0 && this.scatterPlot.getMaxSample() > 0) {
        double[] originLeft = plotPoint(least, 0, least, greatest);
        double[] originRight = plotPoint(greatest, 0, least, greatest);
        double[] originTop = plotPoint(0, least, least, greatest);
        double[] originBottom = plotPoint(0, greatest, least, greatest);
        g2d.drawLine((int) originLeft[0] + 2, (int) originLeft[1] + 2, (int) originRight[0] + 2, (int) originRight[1] + 2);
        g2d.drawLine((int) originTop[0] + 2, (int) originTop[1] + 2, (int) originBottom[0] + 2, (int) originBottom[1] + 2);
    }
    g2d.setColor(new Color(255, 0, 0));
    // draw each point in the indexSet from our ScatterPlot
    for (Object o : scatterPlot.getIndexSet()) {
        int i = (Integer) o;
        double x = this.scatterPlot.getxData()[i];
        double y = this.scatterPlot.getyData()[i];
        double[] result = plotPoint(x, y, least, greatest);
        g2d.fill(new Ellipse2D.Double(result[0], result[1], 4, 4));
    }
    // draw the regression line
    if (scatterPlot.isDrawRegLine()) {
        // RegressionRunner regRunner;
        RegressionDataset regData;
        /*
             * In the following code, the complement of the indexSet (for all integers <= (n - 1)) is
             * calculated.  This set of indices is removed from a copy of our original dataSet, such
             * that the ScatterPlot only contains the appropriate points for rendering.
             */
        Parameters params = new Parameters();
        Vector<String> regressors = new Vector();
        regressors.add(scatterPlot.getXVariable().getName());
        params.set("targetName", scatterPlot.getYVariable().getName());
        if (scatterPlot.getIndexSet().size() != scatterPlot.getDataSet().getNumRows()) {
            ColtDataSet newDataSet = new ColtDataSet((ColtDataSet) scatterPlot.getDataSet());
            int[] throwAway = new int[scatterPlot.getComplementIndexSet().size()];
            for (int j = 0; j < throwAway.length; j++) {
                throwAway[j] = (Integer) scatterPlot.getComplementIndexSet().get(j);
            }
            newDataSet.removeRows(throwAway);
            regData = new RegressionDataset(newDataSet);
        // regRunner = new RegressionRunner(new DataWrapper(newDataSet), params);
        } else {
            regData = new RegressionDataset(scatterPlot.getDataSet());
        // regRunner = new RegressionRunner(new DataWrapper(scatterPlot.dataSet), params);
        }
        // regRunner.execute();
        RegressionResult regResult = regData.regress(scatterPlot.getYVariable(), scatterPlot.getXVariable());
        double[] coef = regResult.getCoef();
        double[] regLeft = plotPoint(least, coef[0] + coef[1] * least, least, greatest);
        double[] regRight = plotPoint(greatest, coef[0] + coef[1] * greatest, least, greatest);
        g2d.setColor(LINE_COLOR);
        g2d.drawLine((int) regLeft[0] + 2, (int) regLeft[1] + 2, (int) regRight[0] + 2, (int) regRight[1] + 2);
    }
    // draw the display string.
    g2d.setColor(LINE_COLOR);
// g2d.drawString(getDisplayString(), PADDINGOTHER, HEIGHT - 5);
}
Also used : Parameters(edu.cmu.tetrad.util.Parameters) Ellipse2D(java.awt.geom.Ellipse2D) RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) Vector(java.util.Vector) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 3 with RegressionDataset

use of edu.cmu.tetrad.regression.RegressionDataset in project tetrad by cmu-phil.

the class IndTestRegression method isIndependent.

/**
 * Determines whether variable x is independent of variable y given a list of conditioning variables z.
 *
 * @param xVar  the one variable being compared.
 * @param yVar  the second variable being compared.
 * @param zList the list of conditioning variables.
 * @return true iff x _||_ y | z.
 * @throws RuntimeException if a matrix singularity is encountered.
 */
public boolean isIndependent(Node xVar, Node yVar, List<Node> zList) {
    if (zList == null) {
        throw new NullPointerException();
    }
    for (Node node : zList) {
        if (node == null) {
            throw new NullPointerException();
        }
    }
    List<Node> regressors = new ArrayList<>();
    regressors.add(dataSet.getVariable(yVar.getName()));
    for (Node zVar : zList) {
        regressors.add(dataSet.getVariable(zVar.getName()));
    }
    Regression regression = new RegressionDataset(dataSet);
    RegressionResult result = null;
    try {
        result = regression.regress(xVar, regressors);
    } catch (Exception e) {
        return false;
    }
    double p = result.getP()[1];
    boolean independent = p > alpha;
    if (verbose) {
        if (independent) {
            TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(xVar, yVar, zList, p));
        } else {
            TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(xVar, yVar, zList, p));
        }
    }
    return independent;
}
Also used : RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) Node(edu.cmu.tetrad.graph.Node) ArrayList(java.util.ArrayList) Regression(edu.cmu.tetrad.regression.Regression) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 4 with RegressionDataset

use of edu.cmu.tetrad.regression.RegressionDataset in project tetrad by cmu-phil.

the class TimeSeriesUtils method sumOfArCoefficients.

public static double sumOfArCoefficients(DataSet timeSeries, int numLags) {
    DataSet timeLags = createLagData(timeSeries, numLags);
    List<Node> regressors = new ArrayList<>();
    for (int i = timeSeries.getNumColumns(); i < timeLags.getNumColumns(); i++) {
        regressors.add(timeLags.getVariable(i));
    }
    Regression regression = new RegressionDataset(timeLags);
    TetradMatrix residuals = new TetradMatrix(timeLags.getNumRows(), timeSeries.getNumColumns());
    double sum = 0.0;
    int n = 0;
    for (int i = 0; i < timeSeries.getNumColumns(); i++) {
        Node target = timeLags.getVariable(i);
        RegressionResult result = regression.regress(target, regressors);
        double[] coef = result.getCoef();
        for (int k = 0; k < coef.length; k++) {
            sum += coef[k] * coef[k];
            n++;
        }
        TetradVector residualsColumn = result.getResiduals();
        // residuals.viewColumn(i).assign(residualsColumn);
        residuals.assignColumn(i, residualsColumn);
    }
    return sum / n;
}
Also used : RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) TetradVector(edu.cmu.tetrad.util.TetradVector) ArrayList(java.util.ArrayList) Regression(edu.cmu.tetrad.regression.Regression) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Example 5 with RegressionDataset

use of edu.cmu.tetrad.regression.RegressionDataset in project tetrad by cmu-phil.

the class TimeSeriesUtils method ar2.

public static DataSet ar2(DataSet timeSeries, int numLags) {
    List<Node> missingVariables = new ArrayList<>();
    for (Node node : timeSeries.getVariables()) {
        int index = timeSeries.getVariables().indexOf(node);
        boolean missing = true;
        for (int i = 0; i < timeSeries.getNumRows(); i++) {
            if (!Double.isNaN(timeSeries.getDouble(i, index))) {
                missing = false;
                break;
            }
        }
        if (missing) {
            missingVariables.add(node);
        }
    }
    DataSet timeLags = createLagData(timeSeries, numLags);
    Regression regression = new RegressionDataset(timeLags);
    TetradMatrix residuals = new TetradMatrix(timeLags.getNumRows(), timeSeries.getNumColumns());
    for (int i = 0; i < timeSeries.getNumColumns(); i++) {
        Node target = timeLags.getVariable(i);
        int index = timeSeries.getVariables().indexOf(target);
        if (missingVariables.contains(target)) {
            for (int i2 = 0; i2 < residuals.rows(); i2++) {
                residuals.set(i2, index, Double.NaN);
            }
            continue;
        }
        List<Node> regressors = new ArrayList<>();
        for (int i2 = timeSeries.getNumColumns(); i2 < timeLags.getNumColumns(); i2++) {
            int varIndex = i2 % timeSeries.getNumColumns();
            Node var = timeSeries.getVariable(varIndex);
            if (missingVariables.contains(var)) {
                continue;
            }
            regressors.add(timeLags.getVariable(i2));
        }
        RegressionResult result = regression.regress(target, regressors);
        TetradVector residualsColumn = result.getResiduals();
        residuals.assignColumn(i, residualsColumn);
    }
    return ColtDataSet.makeContinuousData(timeSeries.getVariables(), residuals);
}
Also used : RegressionDataset(edu.cmu.tetrad.regression.RegressionDataset) TetradVector(edu.cmu.tetrad.util.TetradVector) ArrayList(java.util.ArrayList) Regression(edu.cmu.tetrad.regression.Regression) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) RegressionResult(edu.cmu.tetrad.regression.RegressionResult)

Aggregations

RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)19 Regression (edu.cmu.tetrad.regression.Regression)16 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)16 ArrayList (java.util.ArrayList)10 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)7 TetradVector (edu.cmu.tetrad.util.TetradVector)7 Node (edu.cmu.tetrad.graph.Node)4 DoubleArrayList (cern.colt.list.DoubleArrayList)3 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)3 CombinationGenerator (edu.cmu.tetrad.util.CombinationGenerator)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 OLSMultipleLinearRegression (org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression)2 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)1 GeneralAndersonDarlingTest (edu.cmu.tetrad.data.GeneralAndersonDarlingTest)1 EdgeListGraph (edu.cmu.tetrad.graph.EdgeListGraph)1 RegressionCovariance (edu.cmu.tetrad.regression.RegressionCovariance)1 Parameters (edu.cmu.tetrad.util.Parameters)1 Ellipse2D (java.awt.geom.Ellipse2D)1 Vector (java.util.Vector)1 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)1