Search in sources :

Example 91 with DataSet

use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.

the class RemoveMissingCasesDataFilter method filter.

public DataSet filter(DataSet data) {
    List<Node> variables = data.getVariables();
    int numRows = 0;
    ROWS: for (int row = 0; row < data.getNumRows(); row++) {
        for (int col = 0; col < data.getNumColumns(); col++) {
            Node variable = data.getVariable(col);
            if (((Variable) variable).isMissingValue(data.getObject(row, col))) {
                continue ROWS;
            }
        }
        numRows++;
    }
    DataSet newDataSet = new ColtDataSet(numRows, variables);
    int newRow = 0;
    ROWS: for (int row = 0; row < data.getNumRows(); row++) {
        for (int col = 0; col < data.getNumColumns(); col++) {
            Node variable = data.getVariable(col);
            if (((Variable) variable).isMissingValue(data.getObject(row, col))) {
                continue ROWS;
            }
        }
        for (int col = 0; col < data.getNumColumns(); col++) {
            newDataSet.setObject(newRow, col, data.getObject(row, col));
        }
        newRow++;
    }
    return newDataSet;
}
Also used : ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) Node(edu.cmu.tetrad.graph.Node)

Example 92 with DataSet

use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.

the class EmBayesEstimator method initialize.

private void initialize() {
    DirichletBayesIm prior = DirichletBayesIm.symmetricDirichletIm(bayesPmObs, 0.5);
    observedIm = DirichletEstimator.estimate(prior, dataSet);
    // MLBayesEstimator dirichEst = new MLBayesEstimator();
    // observedIm = dirichEst.estimate(bayesPmObs, dataSet);
    // System.out.println("Estimated Bayes IM for Measured Variables:  ");
    // System.out.println(observedIm);
    // mixedData should be ddsNm with new columns for the latent variables.
    // Each such column should contain missing data for each case.
    int numFullCases = dataSet.getNumRows();
    List<Node> variables = new LinkedList<>();
    for (Node node : nodes) {
        if (node.getNodeType() == NodeType.LATENT) {
            int numCategories = bayesPm.getNumCategories(node);
            DiscreteVariable latentVar = new DiscreteVariable(node.getName(), numCategories);
            latentVar.setNodeType(NodeType.LATENT);
            variables.add(latentVar);
        } else {
            String name = bayesPm.getVariable(node).getName();
            Node variable = dataSet.getVariable(name);
            variables.add(variable);
        }
    }
    DataSet dsMixed = new ColtDataSet(numFullCases, variables);
    for (int j = 0; j < nodes.length; j++) {
        if (nodes[j].getNodeType() == NodeType.LATENT) {
            for (int i = 0; i < numFullCases; i++) {
                dsMixed.setInt(i, j, -99);
            }
        } else {
            String name = bayesPm.getVariable(nodes[j]).getName();
            Node variable = dataSet.getVariable(name);
            int index = dataSet.getColumn(variable);
            for (int i = 0; i < numFullCases; i++) {
                dsMixed.setInt(i, j, dataSet.getInt(i, index));
            }
        }
    }
    // System.out.println(dsMixed);
    mixedData = dsMixed;
    allVariables = mixedData.getVariables();
    // Find the bayes net which is parameterized using mixedData or set randomly when that's
    // not possible.
    estimateIM(bayesPm, mixedData);
    // The following DEBUG section tests a case specified by P. Spirtes
    // DEBUG TAIL:   For use with embayes_l1x1x2x3V3.dat
    /*
        Node l1Node = graph.getNode("L1");
        //int l1Index = bayesImMixed.getNodeIndex(l1Node);
        int l1index = estimatedIm.getNodeIndex(l1Node);
        Node x1Node = graph.getNode("X1");
        //int x1Index = bayesImMixed.getNodeIndex(x1Node);
        int x1Index = estimatedIm.getNodeIndex(x1Node);
        Node x2Node = graph.getNode("X2");
        //int x2Index = bayesImMixed.getNodeIndex(x2Node);
        int x2Index = estimatedIm.getNodeIndex(x2Node);
        Node x3Node = graph.getNode("X3");
        //int x3Index = bayesImMixed.getNodeIndex(x3Node);
        int x3Index = estimatedIm.getNodeIndex(x3Node);

        estimatedIm.setProbability(l1index, 0, 0, 0.5);
        estimatedIm.setProbability(l1index, 0, 1, 0.5);

        //bayesImMixed.setProbability(x1Index, 0, 0, 0.33333);
        //bayesImMixed.setProbability(x1Index, 0, 1, 0.66667);
        estimatedIm.setProbability(x1Index, 0, 0, 0.6);      //p(x1 = 0 | l1 = 0)
        estimatedIm.setProbability(x1Index, 0, 1, 0.4);      //p(x1 = 1 | l1 = 0)
        estimatedIm.setProbability(x1Index, 1, 0, 0.4);      //p(x1 = 0 | l1 = 1)
        estimatedIm.setProbability(x1Index, 1, 1, 0.6);      //p(x1 = 1 | l1 = 1)

        //bayesImMixed.setProbability(x2Index, 1, 0, 0.66667);
        //bayesImMixed.setProbability(x2Index, 1, 1, 0.33333);
        estimatedIm.setProbability(x2Index, 1, 0, 0.4);      //p(x2 = 0 | l1 = 1)
        estimatedIm.setProbability(x2Index, 1, 1, 0.6);      //p(x2 = 1 | l1 = 1)
        estimatedIm.setProbability(x2Index, 0, 0, 0.6);      //p(x2 = 0 | l1 = 0)
        estimatedIm.setProbability(x2Index, 0, 1, 0.4);      //p(x2 = 1 | l1 = 0)

        //bayesImMixed.setProbability(x3Index, 1, 0, 0.66667);
        //bayesImMixed.setProbability(x3Index, 1, 1, 0.33333);
        estimatedIm.setProbability(x3Index, 1, 0, 0.4);      //p(x3 = 0 | l1 = 1)
        estimatedIm.setProbability(x3Index, 1, 1, 0.6);      //p(x3 = 1 | l1 = 1)
        estimatedIm.setProbability(x3Index, 0, 0, 0.6);      //p(x3 = 0 | l1 = 0)
        estimatedIm.setProbability(x3Index, 0, 1, 0.4);      //p(x3 = 1 | l1 = 0)
        */
    // END of TAIL
    // System.out.println("bayes IM estimated by estimateIM");
    // System.out.println(bayesImMixed);
    // System.out.println(estimatedIm);
    estimatedCounts = new double[nodes.length][][];
    estimatedCountsDenom = new double[nodes.length][];
    condProbs = new double[nodes.length][][];
    for (int i = 0; i < nodes.length; i++) {
        // int numRows = bayesImMixed.getNumRows(i);
        int numRows = estimatedIm.getNumRows(i);
        estimatedCounts[i] = new double[numRows][];
        estimatedCountsDenom[i] = new double[numRows];
        condProbs[i] = new double[numRows][];
        // for(int j = 0; j < bayesImMixed.getNumRows(i); j++) {
        for (int j = 0; j < estimatedIm.getNumRows(i); j++) {
            // int numCols = bayesImMixed.getNumColumns(i);
            int numCols = estimatedIm.getNumColumns(i);
            estimatedCounts[i][j] = new double[numCols];
            condProbs[i][j] = new double[numCols];
        }
    }
}
Also used : DiscreteVariable(edu.cmu.tetrad.data.DiscreteVariable) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) Node(edu.cmu.tetrad.graph.Node) LinkedList(java.util.LinkedList)

Example 93 with DataSet

use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.

the class MlBayesEstimator method estimate.

/**
 * 33 Estimates a Bayes IM using the variables, graph, and parameters in the
 * given Bayes PM and the data columns in the given data set. Each variable
 * in the given Bayes PM must be equal to a variable in the given data set.
 */
public BayesIm estimate(BayesPm bayesPm, DataSet dataSet) {
    if (bayesPm == null) {
        throw new NullPointerException();
    }
    if (dataSet == null) {
        throw new NullPointerException();
    }
    // if (DataUtils.containsMissingValue(dataSet)) {
    // throw new IllegalArgumentException("Please remove or impute missing values.");
    // }
    // Make sure all of the variables in the PM are in the data set;
    // otherwise, estimation is impossible.
    BayesUtils.ensureVarsInData(bayesPm.getVariables(), dataSet);
    // Create a new Bayes IM to store the estimated values.
    BayesIm estimatedIm = new MlBayesIm(bayesPm);
    // Create a subset of the data set with the variables of the IM, in
    // the order of the IM.
    List<Node> variables = estimatedIm.getVariables();
    DataSet columnDataSet2 = dataSet.subsetColumns(variables);
    this.reorderedDataSetDiscrete = columnDataSet2;
    DiscreteProbs discreteProbs = new DataSetProbs(columnDataSet2);
    // We will use the same estimation methods as the updaters, to ensure
    // compatibility.
    Proposition assertion = Proposition.tautology(estimatedIm);
    Proposition condition = Proposition.tautology(estimatedIm);
    Evidence evidence2 = Evidence.tautology(estimatedIm);
    int numNodes = estimatedIm.getNumNodes();
    for (int node = 0; node < numNodes; node++) {
        int numRows = estimatedIm.getNumRows(node);
        int numCols = estimatedIm.getNumColumns(node);
        int[] parents = estimatedIm.getParents(node);
        for (int row = 0; row < numRows; row++) {
            int[] parentValues = estimatedIm.getParentValues(node, row);
            for (int col = 0; col < numCols; col++) {
                // Remove values from the proposition in various ways; if
                // a combination exists in the end, calculate a contitional
                // probability.
                assertion.setToTautology();
                condition.setToTautology();
                for (int i = 0; i < numNodes; i++) {
                    for (int j = 0; j < evidence2.getNumCategories(i); j++) {
                        if (!evidence2.getProposition().isAllowed(i, j)) {
                            condition.removeCategory(i, j);
                        }
                    }
                }
                assertion.disallowComplement(node, col);
                for (int k = 0; k < parents.length; k++) {
                    condition.disallowComplement(parents[k], parentValues[k]);
                }
                if (condition.existsCombination()) {
                    double p = discreteProbs.getConditionalProb(assertion, condition);
                    estimatedIm.setProbability(node, row, col, p);
                } else {
                    estimatedIm.setProbability(node, row, col, Double.NaN);
                }
            }
        }
    }
    return estimatedIm;
}
Also used : DataSet(edu.cmu.tetrad.data.DataSet) Node(edu.cmu.tetrad.graph.Node)

Example 94 with DataSet

use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.

the class MlBayesImObs method simulateData.

/**
 * Simulates a sample with the given sample size.
 *
 * @param sampleSize      the sample size.
 * @param seed            the random number generator seed allows you
 *                        recreate the simulated data by passing in the same
 *                        seed (so you don't have to store the sample data
 * @return the simulated sample as a DataSet.
 */
public DataSet simulateData(int sampleSize, long seed, boolean latentDataSaved) {
    RandomUtil random = RandomUtil.getInstance();
    long _seed = random.getSeed();
    random.setSeed(seed);
    DataSet dataSet = simulateData(sampleSize, latentDataSaved);
    random.revertSeed(_seed);
    return dataSet;
}
Also used : RandomUtil(edu.cmu.tetrad.util.RandomUtil) DataSet(edu.cmu.tetrad.data.DataSet) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet)

Example 95 with DataSet

use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.

the class MlBayesImObs method simulateDataHelper.

/**
 * Simulates a sample with the given sample size.
 *
 * @param sampleSize      the sample size.
 * @return the simulated sample as a DataSet.
 */
private DataSet simulateDataHelper(int sampleSize, boolean latentDataSaved) {
    int numMeasured = 0;
    int[] map = new int[nodes.length];
    List<Node> variables = new LinkedList<>();
    for (int j = 0; j < nodes.length; j++) {
        if (!latentDataSaved && nodes[j].getNodeType() != NodeType.MEASURED) {
            continue;
        }
        int numCategories = bayesPm.getNumCategories(nodes[j]);
        List<String> categories = new LinkedList<>();
        for (int k = 0; k < numCategories; k++) {
            categories.add(bayesPm.getCategory(nodes[j], k));
        }
        DiscreteVariable var = new DiscreteVariable(nodes[j].getName(), categories);
        variables.add(var);
        int index = ++numMeasured - 1;
        map[index] = j;
    }
    DataSet dataSet = new ColtDataSet(sampleSize, variables);
    constructSample(sampleSize, numMeasured, dataSet, map);
    return dataSet;
}
Also used : DiscreteVariable(edu.cmu.tetrad.data.DiscreteVariable) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet)

Aggregations

DataSet (edu.cmu.tetrad.data.DataSet)216 Test (org.junit.Test)65 Graph (edu.cmu.tetrad.graph.Graph)64 Node (edu.cmu.tetrad.graph.Node)60 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)48 ArrayList (java.util.ArrayList)45 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)36 GeneralBootstrapTest (edu.pitt.dbmi.algo.bootstrap.GeneralBootstrapTest)32 EdgeListGraph (edu.cmu.tetrad.graph.EdgeListGraph)29 SemIm (edu.cmu.tetrad.sem.SemIm)28 SemPm (edu.cmu.tetrad.sem.SemPm)28 BootstrapEdgeEnsemble (edu.pitt.dbmi.algo.bootstrap.BootstrapEdgeEnsemble)26 DataModel (edu.cmu.tetrad.data.DataModel)22 Parameters (edu.cmu.tetrad.util.Parameters)22 DiscreteVariable (edu.cmu.tetrad.data.DiscreteVariable)20 File (java.io.File)16 ParseException (java.text.ParseException)16 LinkedList (java.util.LinkedList)14 ICovarianceMatrix (edu.cmu.tetrad.data.ICovarianceMatrix)13 DMSearch (edu.cmu.tetrad.search.DMSearch)10