use of edu.cmu.tetrad.util.RandomUtil in project tetrad by cmu-phil.
the class TestBoxDataSet method testDiscrete.
@Test
public void testDiscrete() {
int rows = 10;
int cols = 5;
List<Node> variables = new LinkedList<>();
for (int i = 0; i < cols; i++) {
DiscreteVariable variable = new DiscreteVariable("X" + (i + 1), 3);
variables.add(variable);
}
DataSet dataSet = new BoxDataSet(new DoubleDataBox(rows, variables.size()), variables);
RandomUtil randomUtil = RandomUtil.getInstance();
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
dataSet.setInt(i, j, randomUtil.nextInt(3));
}
}
BoxDataSet _dataSet = new BoxDataSet((BoxDataSet) dataSet);
assertEquals(dataSet, _dataSet);
}
use of edu.cmu.tetrad.util.RandomUtil in project tetrad by cmu-phil.
the class Hsim method hybridsimulate.
// **************Public methods***********************//
public DataSet hybridsimulate() {
/**
*this needs to be made general, rather than only for two specific names nodes*
*/
if (verbose)
System.out.println("Finding a Markov blanket for resimulated nodes");
// initialize an empty set of nodes;
Set<Node> mbAll = new HashSet<Node>();
// init set for adding
Set<Node> mbAdd = new HashSet<Node>();
for (Node node : simnodes) {
// find mb for that node
mbAdd = mb(mydag, node);
// use .addAll to add this mb to the set
mbAll.addAll(mbAdd);
}
// make sure all the simnodes are in mbAll! a disconnected node could cause errors later otherwise
mbAll.addAll(simnodes);
if (verbose)
System.out.println("The Markov Blanket is " + mbAll);
/**
*Find the subgraph for the resimulated variables and their markov blanket*
*/
if (verbose)
System.out.println("Finding a subgraph over the Markov Blanket and Resimulated Nodes");
// need a List as input for subgraph method, but mbAll is a Set
List<Node> mbListAll = new ArrayList<Node>(mbAll);
Graph subgraph = mydag.subgraph(mbListAll);
/**
*Learn an instantiated model over the subgraph*
*/
if (verbose)
System.out.println("Learning an instantiated model for the subgraph");
// learn a dirichlet IM for the subgraph using dataSet
BayesPm subgraphPM = new BayesPm(subgraph);
DirichletBayesIm subgraphIM = DirichletBayesIm.symmetricDirichletIm(subgraphPM, 1.0);
DirichletEstimator estimator = new DirichletEstimator();
DirichletBayesIm fittedsubgraphIM = estimator.estimate(subgraphIM, data);
/**
*Use the learned instantiated subgraph model to create the resimulated data*
*/
if (verbose)
System.out.println("Starting resimulation loop");
// loop through each row of the data set, conditioning and drawing values each time.
for (int row = 0; row < data.getNumRows(); row++) {
// create a new evidence object
Evidence evidence = Evidence.tautology(fittedsubgraphIM);
// need to define the set of variables being conditioned upon. Start with the outer set of MB
Set<Node> mbOuter = mbAll;
// need to remove the whole set of starters, not just some X and Y... how do? loop a .remove?
for (Node node : simnodes) {
mbOuter.remove(node);
}
// THIS SHOULD ALL BE INSIDE ANOTHER LOOP THROUGH THE RESIM VARS:
// this actually needs to be more careful than a for each. I think a causal ordering of resim should be used?
Set<Node> conditionNodes = mbOuter;
for (Node node : simnodes) {
// loop through all the nodes being conditioned upon, and set their values in the evidence prop
for (Node i : conditionNodes) {
int nodeIndex = evidence.getNodeIndex(i.getName());
// how do i get the category index from a value in the data?
// int catIndex =
int nodeColumn = data.getColumn(i);
// Pray to whoever you can think of that the CategoryIndex is just the int in the data
// According to this comment in the DataSet class, for the getInt method, we can do this:
// "For discrete variables, this returns the category index of the datum for the variable at that column."
evidence.getProposition().setCategory(nodeIndex, data.getInt(row, nodeColumn));
}
// use the new Evidence object to create the updater
RowSummingExactUpdater conditionUpdate = new RowSummingExactUpdater(fittedsubgraphIM, evidence);
// NEED THIS TO WORK FOR MORE THAN NODEX, needs to be arbitrary and looping
// use the updater to create the marginal distribution for nodeX:
// need to get nodeX's int index first
int nodeIndex = evidence.getNodeIndex(node.getName());
// ===complain if no node of that name is found, which makes nodeIndex = -1
if (nodeIndex == -1) {
throw new IllegalArgumentException("Variable " + node.getName() + " was not found.");
}
// ===for bug checking====
// if (verbose) System.out.println(node.getName());
// if (verbose) System.out.println(nodeIndex);
// then need to identify all of nodeX's categories so we can iterate through them
// I can't figure out a nice way to identify this generally, so we're going to cross out fingers
// and hope that it's just 0, 1, 2, 3... n-1, with n-1 being the largest int category
// that would correspond to n different categories
// so, we're gonna see how many categories there are, call that n, and iterate from 0 to n-1
int numCat = evidence.getNumCategories(nodeIndex);
// we generate a random number between 0 and 1, and then count prob mass for each cat until we hit it
RandomUtil random = RandomUtil.getInstance();
double cutoff = random.nextDouble();
// if (verbose) System.out.println(cutoff);
// ****** turns out, this needs to be generalized outside of just X and Y as well. doh!*******//
// for (resimvars) {do the next stuff} //how to iterate through them? order matters, need causal ordering
double sum = 0.0;
// initialize the int for the new value of nodeX
int newValue = -99;
// now iterate through the categories to see which one owns that probability mass real estate
for (int i = 0; i < numCat; i++) {
// for each category, calc the marginal conditional prob of nodeX having that value
double probability = conditionUpdate.getMarginal(nodeIndex, i);
// if (verbose) System.out.println("cat " + i + " prob " + probability);
sum += probability;
if (sum >= cutoff) {
newValue = i;
break;
}
}
// then set the value of nodeX to newXvalue for this row
// if (verbose) System.out.println(data.getInt(row,data.getColumn(nodeX)) + " old vs new " + newXvalue);
data.setInt(row, data.getColumn(node), newValue);
// if (verbose) System.out.println(" and again?: " + data.getInt(row,data.getColumn(nodeX)) + " old vs new " + newXvalue);
// at the end, at this node to the conditioning set
conditionNodes.add(node);
}
}
return data;
}
use of edu.cmu.tetrad.util.RandomUtil in project tetrad by cmu-phil.
the class MlBayesImObs method simulateData.
/**
* Simulates a sample with the given sample size.
*
* @param sampleSize the sample size.
* @param seed the random number generator seed allows you
* recreate the simulated data by passing in the same
* seed (so you don't have to store the sample data
* @return the simulated sample as a DataSet.
*/
public DataSet simulateData(int sampleSize, long seed, boolean latentDataSaved) {
RandomUtil random = RandomUtil.getInstance();
long _seed = random.getSeed();
random.setSeed(seed);
DataSet dataSet = simulateData(sampleSize, latentDataSaved);
random.revertSeed(_seed);
return dataSet;
}
use of edu.cmu.tetrad.util.RandomUtil in project tetrad by cmu-phil.
the class BayesPm method pickNumVals.
private static int pickNumVals(int lowerBound, int upperBound) {
if (lowerBound < 2) {
throw new IllegalArgumentException("Lower bound must be >= 2: " + lowerBound);
}
if (upperBound < lowerBound) {
throw new IllegalArgumentException("Upper bound for number of categories must be >= lower " + "bound.");
}
int difference = upperBound - lowerBound;
RandomUtil randomUtil = RandomUtil.getInstance();
return randomUtil.nextInt(difference + 1) + lowerBound;
}
use of edu.cmu.tetrad.util.RandomUtil in project tetrad by cmu-phil.
the class DirichletBayesIm method simulateData.
/**
* Simulates a random sample with the number of cases equal to
* <code>sampleSize</code>.
*
* @param sampleSize the sample size.
* @param seed the random number generator seed allows you
* recreate the simulated data by passing in the same
* seed (so you don't have to store the sample data
* @param latentDataSaved true iff data for latent variables should be
* included in the simulated data set.
* @return the simulated sample as a DataSet.
*/
public DataSet simulateData(int sampleSize, long seed, boolean latentDataSaved) {
RandomUtil random = RandomUtil.getInstance();
long _seed = random.getSeed();
random.setSeed(seed);
DataSet dataSet = simulateData(sampleSize, latentDataSaved);
random.revertSeed(_seed);
return dataSet;
}
Aggregations