use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class RemoveMissingCasesDataFilter method filter.
public DataSet filter(DataSet data) {
List<Node> variables = data.getVariables();
int numRows = 0;
ROWS: for (int row = 0; row < data.getNumRows(); row++) {
for (int col = 0; col < data.getNumColumns(); col++) {
Node variable = data.getVariable(col);
if (((Variable) variable).isMissingValue(data.getObject(row, col))) {
continue ROWS;
}
}
numRows++;
}
DataSet newDataSet = new ColtDataSet(numRows, variables);
int newRow = 0;
ROWS: for (int row = 0; row < data.getNumRows(); row++) {
for (int col = 0; col < data.getNumColumns(); col++) {
Node variable = data.getVariable(col);
if (((Variable) variable).isMissingValue(data.getObject(row, col))) {
continue ROWS;
}
}
for (int col = 0; col < data.getNumColumns(); col++) {
newDataSet.setObject(newRow, col, data.getObject(row, col));
}
newRow++;
}
return newDataSet;
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class EmBayesEstimator method initialize.
private void initialize() {
DirichletBayesIm prior = DirichletBayesIm.symmetricDirichletIm(bayesPmObs, 0.5);
observedIm = DirichletEstimator.estimate(prior, dataSet);
// MLBayesEstimator dirichEst = new MLBayesEstimator();
// observedIm = dirichEst.estimate(bayesPmObs, dataSet);
// System.out.println("Estimated Bayes IM for Measured Variables: ");
// System.out.println(observedIm);
// mixedData should be ddsNm with new columns for the latent variables.
// Each such column should contain missing data for each case.
int numFullCases = dataSet.getNumRows();
List<Node> variables = new LinkedList<>();
for (Node node : nodes) {
if (node.getNodeType() == NodeType.LATENT) {
int numCategories = bayesPm.getNumCategories(node);
DiscreteVariable latentVar = new DiscreteVariable(node.getName(), numCategories);
latentVar.setNodeType(NodeType.LATENT);
variables.add(latentVar);
} else {
String name = bayesPm.getVariable(node).getName();
Node variable = dataSet.getVariable(name);
variables.add(variable);
}
}
DataSet dsMixed = new ColtDataSet(numFullCases, variables);
for (int j = 0; j < nodes.length; j++) {
if (nodes[j].getNodeType() == NodeType.LATENT) {
for (int i = 0; i < numFullCases; i++) {
dsMixed.setInt(i, j, -99);
}
} else {
String name = bayesPm.getVariable(nodes[j]).getName();
Node variable = dataSet.getVariable(name);
int index = dataSet.getColumn(variable);
for (int i = 0; i < numFullCases; i++) {
dsMixed.setInt(i, j, dataSet.getInt(i, index));
}
}
}
// System.out.println(dsMixed);
mixedData = dsMixed;
allVariables = mixedData.getVariables();
// Find the bayes net which is parameterized using mixedData or set randomly when that's
// not possible.
estimateIM(bayesPm, mixedData);
// The following DEBUG section tests a case specified by P. Spirtes
// DEBUG TAIL: For use with embayes_l1x1x2x3V3.dat
/*
Node l1Node = graph.getNode("L1");
//int l1Index = bayesImMixed.getNodeIndex(l1Node);
int l1index = estimatedIm.getNodeIndex(l1Node);
Node x1Node = graph.getNode("X1");
//int x1Index = bayesImMixed.getNodeIndex(x1Node);
int x1Index = estimatedIm.getNodeIndex(x1Node);
Node x2Node = graph.getNode("X2");
//int x2Index = bayesImMixed.getNodeIndex(x2Node);
int x2Index = estimatedIm.getNodeIndex(x2Node);
Node x3Node = graph.getNode("X3");
//int x3Index = bayesImMixed.getNodeIndex(x3Node);
int x3Index = estimatedIm.getNodeIndex(x3Node);
estimatedIm.setProbability(l1index, 0, 0, 0.5);
estimatedIm.setProbability(l1index, 0, 1, 0.5);
//bayesImMixed.setProbability(x1Index, 0, 0, 0.33333);
//bayesImMixed.setProbability(x1Index, 0, 1, 0.66667);
estimatedIm.setProbability(x1Index, 0, 0, 0.6); //p(x1 = 0 | l1 = 0)
estimatedIm.setProbability(x1Index, 0, 1, 0.4); //p(x1 = 1 | l1 = 0)
estimatedIm.setProbability(x1Index, 1, 0, 0.4); //p(x1 = 0 | l1 = 1)
estimatedIm.setProbability(x1Index, 1, 1, 0.6); //p(x1 = 1 | l1 = 1)
//bayesImMixed.setProbability(x2Index, 1, 0, 0.66667);
//bayesImMixed.setProbability(x2Index, 1, 1, 0.33333);
estimatedIm.setProbability(x2Index, 1, 0, 0.4); //p(x2 = 0 | l1 = 1)
estimatedIm.setProbability(x2Index, 1, 1, 0.6); //p(x2 = 1 | l1 = 1)
estimatedIm.setProbability(x2Index, 0, 0, 0.6); //p(x2 = 0 | l1 = 0)
estimatedIm.setProbability(x2Index, 0, 1, 0.4); //p(x2 = 1 | l1 = 0)
//bayesImMixed.setProbability(x3Index, 1, 0, 0.66667);
//bayesImMixed.setProbability(x3Index, 1, 1, 0.33333);
estimatedIm.setProbability(x3Index, 1, 0, 0.4); //p(x3 = 0 | l1 = 1)
estimatedIm.setProbability(x3Index, 1, 1, 0.6); //p(x3 = 1 | l1 = 1)
estimatedIm.setProbability(x3Index, 0, 0, 0.6); //p(x3 = 0 | l1 = 0)
estimatedIm.setProbability(x3Index, 0, 1, 0.4); //p(x3 = 1 | l1 = 0)
*/
// END of TAIL
// System.out.println("bayes IM estimated by estimateIM");
// System.out.println(bayesImMixed);
// System.out.println(estimatedIm);
estimatedCounts = new double[nodes.length][][];
estimatedCountsDenom = new double[nodes.length][];
condProbs = new double[nodes.length][][];
for (int i = 0; i < nodes.length; i++) {
// int numRows = bayesImMixed.getNumRows(i);
int numRows = estimatedIm.getNumRows(i);
estimatedCounts[i] = new double[numRows][];
estimatedCountsDenom[i] = new double[numRows];
condProbs[i] = new double[numRows][];
// for(int j = 0; j < bayesImMixed.getNumRows(i); j++) {
for (int j = 0; j < estimatedIm.getNumRows(i); j++) {
// int numCols = bayesImMixed.getNumColumns(i);
int numCols = estimatedIm.getNumColumns(i);
estimatedCounts[i][j] = new double[numCols];
condProbs[i][j] = new double[numCols];
}
}
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class MlBayesEstimator method estimate.
/**
* 33 Estimates a Bayes IM using the variables, graph, and parameters in the
* given Bayes PM and the data columns in the given data set. Each variable
* in the given Bayes PM must be equal to a variable in the given data set.
*/
public BayesIm estimate(BayesPm bayesPm, DataSet dataSet) {
if (bayesPm == null) {
throw new NullPointerException();
}
if (dataSet == null) {
throw new NullPointerException();
}
// if (DataUtils.containsMissingValue(dataSet)) {
// throw new IllegalArgumentException("Please remove or impute missing values.");
// }
// Make sure all of the variables in the PM are in the data set;
// otherwise, estimation is impossible.
BayesUtils.ensureVarsInData(bayesPm.getVariables(), dataSet);
// Create a new Bayes IM to store the estimated values.
BayesIm estimatedIm = new MlBayesIm(bayesPm);
// Create a subset of the data set with the variables of the IM, in
// the order of the IM.
List<Node> variables = estimatedIm.getVariables();
DataSet columnDataSet2 = dataSet.subsetColumns(variables);
this.reorderedDataSetDiscrete = columnDataSet2;
DiscreteProbs discreteProbs = new DataSetProbs(columnDataSet2);
// We will use the same estimation methods as the updaters, to ensure
// compatibility.
Proposition assertion = Proposition.tautology(estimatedIm);
Proposition condition = Proposition.tautology(estimatedIm);
Evidence evidence2 = Evidence.tautology(estimatedIm);
int numNodes = estimatedIm.getNumNodes();
for (int node = 0; node < numNodes; node++) {
int numRows = estimatedIm.getNumRows(node);
int numCols = estimatedIm.getNumColumns(node);
int[] parents = estimatedIm.getParents(node);
for (int row = 0; row < numRows; row++) {
int[] parentValues = estimatedIm.getParentValues(node, row);
for (int col = 0; col < numCols; col++) {
// Remove values from the proposition in various ways; if
// a combination exists in the end, calculate a contitional
// probability.
assertion.setToTautology();
condition.setToTautology();
for (int i = 0; i < numNodes; i++) {
for (int j = 0; j < evidence2.getNumCategories(i); j++) {
if (!evidence2.getProposition().isAllowed(i, j)) {
condition.removeCategory(i, j);
}
}
}
assertion.disallowComplement(node, col);
for (int k = 0; k < parents.length; k++) {
condition.disallowComplement(parents[k], parentValues[k]);
}
if (condition.existsCombination()) {
double p = discreteProbs.getConditionalProb(assertion, condition);
estimatedIm.setProbability(node, row, col, p);
} else {
estimatedIm.setProbability(node, row, col, Double.NaN);
}
}
}
}
return estimatedIm;
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class MlBayesImObs method simulateData.
/**
* Simulates a sample with the given sample size.
*
* @param sampleSize the sample size.
* @param seed the random number generator seed allows you
* recreate the simulated data by passing in the same
* seed (so you don't have to store the sample data
* @return the simulated sample as a DataSet.
*/
public DataSet simulateData(int sampleSize, long seed, boolean latentDataSaved) {
RandomUtil random = RandomUtil.getInstance();
long _seed = random.getSeed();
random.setSeed(seed);
DataSet dataSet = simulateData(sampleSize, latentDataSaved);
random.revertSeed(_seed);
return dataSet;
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class MlBayesImObs method simulateDataHelper.
/**
* Simulates a sample with the given sample size.
*
* @param sampleSize the sample size.
* @return the simulated sample as a DataSet.
*/
private DataSet simulateDataHelper(int sampleSize, boolean latentDataSaved) {
int numMeasured = 0;
int[] map = new int[nodes.length];
List<Node> variables = new LinkedList<>();
for (int j = 0; j < nodes.length; j++) {
if (!latentDataSaved && nodes[j].getNodeType() != NodeType.MEASURED) {
continue;
}
int numCategories = bayesPm.getNumCategories(nodes[j]);
List<String> categories = new LinkedList<>();
for (int k = 0; k < numCategories; k++) {
categories.add(bayesPm.getCategory(nodes[j], k));
}
DiscreteVariable var = new DiscreteVariable(nodes[j].getName(), categories);
variables.add(var);
int index = ++numMeasured - 1;
map[index] = j;
}
DataSet dataSet = new ColtDataSet(sampleSize, variables);
constructSample(sampleSize, numMeasured, dataSet, map);
return dataSet;
}
Aggregations