use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class GdistanceTest method main.
public static void main(String... args) {
// first generate a couple random graphs
int numVars = 16;
int numEdges = 16;
List<Node> vars = new ArrayList<>();
for (int i = 0; i < numVars; i++) {
vars.add(new ContinuousVariable("X" + i));
}
Graph testdag1 = GraphUtils.randomGraphRandomForwardEdges(vars, 0, numEdges, 30, 15, 15, false, true);
Graph testdag2 = GraphUtils.randomGraphRandomForwardEdges(vars, 0, numEdges, 30, 15, 15, false, true);
// System.out.println(testdag1);
// load the location map
String workingDirectory = System.getProperty("user.dir");
System.out.println(workingDirectory);
Path mapPath = Paths.get("locationMap.txt");
System.out.println(mapPath);
TabularDataReader dataReaderMap = new ContinuousTabularDataFileReader(mapPath.toFile(), Delimiter.COMMA);
try {
DataSet locationMap = (DataSet) DataConvertUtils.toDataModel(dataReaderMap.readInData());
// System.out.println(locationMap);
// then compare their distance
double xdist = 2.4;
double ydist = 2.4;
double zdist = 2;
Gdistance gdist = new Gdistance(locationMap, xdist, ydist, zdist);
List<Double> output = gdist.distances(testdag1, testdag2);
System.out.println(output);
PrintWriter writer = new PrintWriter("Gdistances.txt", "UTF-8");
writer.println(output);
writer.close();
} catch (Exception IOException) {
IOException.printStackTrace();
}
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class HsimContinuous method hybridsimulate.
// **************Public methods***********************//
public DataSet hybridsimulate() {
/**
*this needs to be made general, rather than only for two specific names nodes*
*/
if (verbose)
System.out.println("Finding a Markov blanket for resimulated nodes");
// initialize an empty set of nodes;
Set<Node> mbAll = new HashSet<Node>();
// init set for adding
Set<Node> mbAdd = new HashSet<Node>();
for (Node node : simnodes) {
// find mb for that node
mbAdd = mb(mydag, node);
// use .addAll to add this mb to the set
mbAll.addAll(mbAdd);
}
// make sure all the simnodes are in mbAll! a disconnected node could cause errors later otherwise
mbAll.addAll(simnodes);
if (verbose)
System.out.println("The Markov Blanket is " + mbAll);
/**
*Find the subgraph for the resimulated variables and their markov blanket*
*/
if (verbose)
System.out.println("Finding a subgraph over the Markov Blanket and Resimulated Nodes");
// need a List as input for subgraph method, but mbAll is a Set
List<Node> mbListAll = new ArrayList<Node>(mbAll);
Graph subgraph = mydag.subgraph(mbListAll);
/**
*Learn an instantiated model over the subgraph*
*/
if (verbose)
System.out.println("Learning an instantiated model for the subgraph");
// Do this step continuous instead of discrete:
// learn a dirichlet IM for the subgraph using dataSet
SemPm subgraphPM = new SemPm(subgraph);
SemEstimator subgraphEstimator = new SemEstimator(data, subgraphPM);
SemIm subgraphIM = subgraphEstimator.estimate();
/**
*Use the learned instantiated subgraph model to create the resimulated data*
*/
if (verbose)
System.out.println("Starting resimulation loop");
// loop through each row of the data set, conditioning and drawing values each time.
for (int row = 0; row < data.getNumRows(); row++) {
// create a new evidence object
SemEvidence evidence = new SemEvidence(subgraphIM);
// need to define the set of variables being conditioned upon. Start with the outer set of MB
Set<Node> mbOuter = mbAll;
// need to remove the whole set of starters, not just some X and Y... how do? loop a .remove?
for (Node node : simnodes) {
mbOuter.remove(node);
}
// loop through all the nodes being conditioned upon, and set their values in the evidence prop
for (Node i : mbOuter) {
// int nodeIndex = evidence.getNodeIndex(i.getName());
int nodeColumn = data.getColumn(i);
evidence.getProposition().setValue(i, data.getDouble(row, nodeColumn));
}
// use the new Evidence object to create the updater
SemUpdater conditionUpdate = new SemUpdater(subgraphIM);
conditionUpdate.setEvidence(evidence);
SemIm updatedIM = conditionUpdate.getUpdatedSemIm();
// draw values for the node we're resimming
DataSet newValues = updatedIM.simulateData(1, false);
// take these new simnodes values and replace the old values in the data set with them
for (Node node : simnodes) {
// if (verbose) System.out.println(data.getInt(row,data.getColumn(nodeX)) + " old vs new " + newXvalue);
data.setDouble(row, data.getColumn(node), newValues.getDouble(0, newValues.getColumn(node)));
// if (verbose) System.out.println(" and again?: " + data.getInt(row,data.getColumn(nodeX)) + " old vs new " + newXvalue);
}
}
return data;
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class HsimEvalFromData method main.
public static void main(String[] args) {
long timestart = System.nanoTime();
System.out.println("Beginning Evaluation");
String nl = System.lineSeparator();
String output = "Simulation edu.cmu.tetrad.study output comparing Fsim and Hsim on predicting graph discovery accuracy" + nl;
int iterations = 100;
int vars = 20;
int cases = 500;
int edgeratio = 3;
List<Integer> hsimRepeat = Arrays.asList(40);
List<Integer> fsimRepeat = Arrays.asList(40);
List<PRAOerrors>[] fsimErrsByPars = new ArrayList[fsimRepeat.size()];
int whichFrepeat = 0;
for (int frepeat : fsimRepeat) {
fsimErrsByPars[whichFrepeat] = new ArrayList<PRAOerrors>();
whichFrepeat++;
}
List<PRAOerrors>[][] hsimErrsByPars = new ArrayList[1][hsimRepeat.size()];
// System.out.println(resimSize.size()+" "+hsimRepeat.size());
int whichHrepeat;
whichHrepeat = 0;
for (int hrepeat : hsimRepeat) {
// System.out.println(whichrsize+" "+whichHrepeat);
hsimErrsByPars[0][whichHrepeat] = new ArrayList<PRAOerrors>();
whichHrepeat++;
}
// !(*%(@!*^!($%!^ START ITERATING HERE !#$%(*$#@!^(*!$*%(!$#
try {
for (int iterate = 0; iterate < iterations; iterate++) {
System.out.println("iteration " + iterate);
// @#$%@$%^@$^@$^@%$%@$#^ LOADING THE DATA AND GRAPH @$#%%*#^##*^$#@%$
DataSet data1;
Graph graph1 = GraphUtils.loadGraphTxt(new File("graph/graph.1.txt"));
Dag odag = new Dag(graph1);
Set<String> eVars = new HashSet<String>();
eVars.add("MULT");
Path dataFile = Paths.get("data/data.1.txt");
TabularDataReader dataReader = new ContinuousTabularDataFileReader(dataFile.toFile(), Delimiter.TAB);
data1 = (DataSet) DataConvertUtils.toDataModel(dataReader.readInData(eVars));
vars = data1.getNumColumns();
cases = data1.getNumRows();
edgeratio = 3;
// !#@^$@&%^!#$!&@^ CALCULATING TARGET ERRORS $%$#@^@!%!#^$!%$#%
ICovarianceMatrix newcov = new CovarianceMatrixOnTheFly(data1);
SemBicScore oscore = new SemBicScore(newcov);
Fges ofgs = new Fges(oscore);
ofgs.setVerbose(false);
ofgs.setNumPatternsToStore(0);
// ***********This is the original FGS output on the data
Graph oFGSGraph = ofgs.search();
PRAOerrors oErrors = new PRAOerrors(HsimUtils.errorEval(oFGSGraph, odag), "target errors");
// **then step 1: full resim. iterate through the combinations of estimator parameters (just repeat num)
for (whichFrepeat = 0; whichFrepeat < fsimRepeat.size(); whichFrepeat++) {
ArrayList<PRAOerrors> errorsList = new ArrayList<PRAOerrors>();
for (int r = 0; r < fsimRepeat.get(whichFrepeat); r++) {
PatternToDag pickdag = new PatternToDag(oFGSGraph);
Graph fgsDag = pickdag.patternToDagMeek();
Dag fgsdag2 = new Dag(fgsDag);
// then fit an IM to this dag and the data. GeneralizedSemEstimator seems to bug out
// GeneralizedSemPm simSemPm = new GeneralizedSemPm(fgsdag2);
// GeneralizedSemEstimator gsemEstimator = new GeneralizedSemEstimator();
// GeneralizedSemIm fittedIM = gsemEstimator.estimate(simSemPm, oData);
SemPm simSemPm = new SemPm(fgsdag2);
// BayesPm simBayesPm = new BayesPm(fgsdag2, bayesPm);
SemEstimator simSemEstimator = new SemEstimator(data1, simSemPm);
SemIm fittedIM = simSemEstimator.estimate();
DataSet simData = fittedIM.simulateData(data1.getNumRows(), false);
// after making the full resim data (simData), run FGS on that
ICovarianceMatrix simcov = new CovarianceMatrixOnTheFly(simData);
SemBicScore simscore = new SemBicScore(simcov);
Fges simfgs = new Fges(simscore);
simfgs.setVerbose(false);
simfgs.setNumPatternsToStore(0);
Graph simGraphOut = simfgs.search();
PRAOerrors simErrors = new PRAOerrors(HsimUtils.errorEval(simGraphOut, fgsdag2), "Fsim errors " + r);
errorsList.add(simErrors);
}
PRAOerrors avErrors = new PRAOerrors(errorsList, "Average errors for Fsim at repeat=" + fsimRepeat.get(whichFrepeat));
// if (verbosity>3) System.out.println(avErrors.allToString());
// ****calculate the squared errors of prediction, store all these errors in a list
double FsimAR2 = (avErrors.getAdjRecall() - oErrors.getAdjRecall()) * (avErrors.getAdjRecall() - oErrors.getAdjRecall());
double FsimAP2 = (avErrors.getAdjPrecision() - oErrors.getAdjPrecision()) * (avErrors.getAdjPrecision() - oErrors.getAdjPrecision());
double FsimOR2 = (avErrors.getOrientRecall() - oErrors.getOrientRecall()) * (avErrors.getOrientRecall() - oErrors.getOrientRecall());
double FsimOP2 = (avErrors.getOrientPrecision() - oErrors.getOrientPrecision()) * (avErrors.getOrientPrecision() - oErrors.getOrientPrecision());
PRAOerrors Fsim2 = new PRAOerrors(new double[] { FsimAR2, FsimAP2, FsimOR2, FsimOP2 }, "squared errors for Fsim at repeat=" + fsimRepeat.get(whichFrepeat));
// add the fsim squared errors to the appropriate list
fsimErrsByPars[whichFrepeat].add(Fsim2);
}
// **then step 2: hybrid sim. iterate through combos of params (repeat num, resimsize)
for (whichHrepeat = 0; whichHrepeat < hsimRepeat.size(); whichHrepeat++) {
HsimRepeatAC study = new HsimRepeatAC(data1);
PRAOerrors HsimErrors = new PRAOerrors(study.run(1, hsimRepeat.get(whichHrepeat)), "Hsim errors" + "at rsize=" + 1 + " repeat=" + hsimRepeat.get(whichHrepeat));
// ****calculate the squared errors of prediction
double HsimAR2 = (HsimErrors.getAdjRecall() - oErrors.getAdjRecall()) * (HsimErrors.getAdjRecall() - oErrors.getAdjRecall());
double HsimAP2 = (HsimErrors.getAdjPrecision() - oErrors.getAdjPrecision()) * (HsimErrors.getAdjPrecision() - oErrors.getAdjPrecision());
double HsimOR2 = (HsimErrors.getOrientRecall() - oErrors.getOrientRecall()) * (HsimErrors.getOrientRecall() - oErrors.getOrientRecall());
double HsimOP2 = (HsimErrors.getOrientPrecision() - oErrors.getOrientPrecision()) * (HsimErrors.getOrientPrecision() - oErrors.getOrientPrecision());
PRAOerrors Hsim2 = new PRAOerrors(new double[] { HsimAR2, HsimAP2, HsimOR2, HsimOP2 }, "squared errors for Hsim, rsize=" + 1 + " repeat=" + hsimRepeat.get(whichHrepeat));
hsimErrsByPars[0][whichHrepeat].add(Hsim2);
}
}
// Average the squared errors for each set of fsim/hsim params across all iterations
PRAOerrors[] fMSE = new PRAOerrors[fsimRepeat.size()];
PRAOerrors[][] hMSE = new PRAOerrors[1][hsimRepeat.size()];
String[][] latexTableArray = new String[1 * hsimRepeat.size() + fsimRepeat.size()][5];
for (int j = 0; j < fMSE.length; j++) {
fMSE[j] = new PRAOerrors(fsimErrsByPars[j], "MSE for Fsim at vars=" + vars + " edgeratio=" + edgeratio + " cases=" + cases + " frepeat=" + fsimRepeat.get(j) + " iterations=" + iterations);
// if(verbosity>0){System.out.println(fMSE[j].allToString());}
output = output + fMSE[j].allToString() + nl;
latexTableArray[j] = prelimToPRAOtable(fMSE[j]);
}
for (int j = 0; j < hMSE.length; j++) {
for (int k = 0; k < hMSE[j].length; k++) {
hMSE[j][k] = new PRAOerrors(hsimErrsByPars[j][k], "MSE for Hsim at vars=" + vars + " edgeratio=" + edgeratio + " cases=" + cases + " rsize=" + 1 + " repeat=" + hsimRepeat.get(k) + " iterations=" + iterations);
// if(verbosity>0){System.out.println(hMSE[j][k].allToString());}
output = output + hMSE[j][k].allToString() + nl;
latexTableArray[fsimRepeat.size() + j * hMSE[j].length + k] = prelimToPRAOtable(hMSE[j][k]);
}
}
// record all the params, the base error values, and the fsim/hsim mean squared errors
String latexTable = HsimUtils.makeLatexTable(latexTableArray);
PrintWriter writer = new PrintWriter("latexTable.txt", "UTF-8");
writer.println(latexTable);
writer.close();
PrintWriter writer2 = new PrintWriter("HvsF-SimulationEvaluation.txt", "UTF-8");
writer2.println(output);
writer2.close();
long timestop = System.nanoTime();
System.out.println("Evaluation Concluded. Duration: " + (timestop - timestart) / 1000000000 + "s");
} catch (Exception IOException) {
IOException.printStackTrace();
}
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class HsimRobustCompare method run.
// *************Public Methods*****************8//
public static List<double[]> run(int numVars, double edgesPerNode, int numCases, double penaltyDiscount, int resimSize, int repeat, boolean verbose) {
// public static void main(String[] args) {
// first generate the data
RandomUtil.getInstance().setSeed(1450184147770L);
// '\t';
char delimiter = ',';
final int numEdges = (int) (numVars * edgesPerNode);
List<Node> vars = new ArrayList<>();
double[] oErrors = new double[5];
double[] hsimErrors = new double[5];
double[] simErrors = new double[5];
List<double[]> output = new ArrayList<>();
for (int i = 0; i < numVars; i++) {
vars.add(new ContinuousVariable("X" + i));
}
Graph odag = GraphUtils.randomGraphRandomForwardEdges(vars, 0, numEdges, 30, 15, 15, false, true);
BayesPm bayesPm = new BayesPm(odag, 2, 2);
BayesIm bayesIm = new MlBayesIm(bayesPm, MlBayesIm.RANDOM);
// oData is the original data set, and odag is the original dag.
DataSet oData = bayesIm.simulateData(numCases, false);
// System.out.println(oData);
// System.out.println(odag);
// then run FGES
BDeuScore oscore = new BDeuScore(oData);
Fges fges = new Fges(oscore);
fges.setVerbose(false);
fges.setNumPatternsToStore(0);
fges.setPenaltyDiscount(penaltyDiscount);
Graph oGraphOut = fges.search();
if (verbose)
System.out.println(oGraphOut);
// calculate FGES errors
oErrors = new double[5];
oErrors = HsimUtils.errorEval(oGraphOut, odag);
if (verbose)
System.out.println(oErrors[0] + " " + oErrors[1] + " " + oErrors[2] + " " + oErrors[3] + " " + oErrors[4]);
// create various simulated data sets
// //let's do the full simulated data set first: a dag in the FGES pattern fit to the data set.
PatternToDag pickdag = new PatternToDag(oGraphOut);
Graph fgesDag = pickdag.patternToDagMeek();
Dag fgesdag2 = new Dag(fgesDag);
BayesPm simBayesPm = new BayesPm(fgesdag2, bayesPm);
DirichletBayesIm simIM = DirichletBayesIm.symmetricDirichletIm(simBayesPm, 1.0);
DirichletEstimator simEstimator = new DirichletEstimator();
DirichletBayesIm fittedIM = simEstimator.estimate(simIM, oData);
DataSet simData = fittedIM.simulateData(numCases, false);
// //next let's do a schedule of small hsims
HsimRepeatAutoRun study = new HsimRepeatAutoRun(oData);
hsimErrors = study.run(resimSize, repeat);
// calculate errors for all simulated output graphs
// //full simulation errors first
BDeuScore simscore = new BDeuScore(simData);
Fges simfges = new Fges(simscore);
simfges.setVerbose(false);
simfges.setNumPatternsToStore(0);
simfges.setPenaltyDiscount(penaltyDiscount);
Graph simGraphOut = simfges.search();
// simErrors = new double[5];
simErrors = HsimUtils.errorEval(simGraphOut, fgesdag2);
// first, let's just see what the errors are.
if (verbose)
System.out.println("Original erors are: " + oErrors[0] + " " + oErrors[1] + " " + oErrors[2] + " " + oErrors[3] + " " + oErrors[4]);
if (verbose)
System.out.println("Full resim errors are: " + simErrors[0] + " " + simErrors[1] + " " + simErrors[2] + " " + simErrors[3] + " " + simErrors[4]);
if (verbose)
System.out.println("HSim errors are: " + hsimErrors[0] + " " + hsimErrors[1] + " " + hsimErrors[2] + " " + hsimErrors[3] + " " + hsimErrors[4]);
// then, let's try to squeeze these numbers down into something more tractable.
// double[] ErrorDifferenceDifferences;
// ErrorDifferenceDifferences = new double[5];
// ErrorDifferenceDifferences[0] = Math.abs(oErrors[0]-simErrors[0])-Math.abs(oErrors[0]-hsimErrors[0]);
// ErrorDifferenceDifferences[1] = Math.abs(oErrors[1]-simErrors[1])-Math.abs(oErrors[1]-hsimErrors[1]);
// ErrorDifferenceDifferences[2] = Math.abs(oErrors[2]-simErrors[2])-Math.abs(oErrors[2]-hsimErrors[2]);
// ErrorDifferenceDifferences[3] = Math.abs(oErrors[3]-simErrors[3])-Math.abs(oErrors[3]-hsimErrors[3]);
// ErrorDifferenceDifferences[4] = Math.abs(oErrors[4]-simErrors[4])-Math.abs(oErrors[4]-hsimErrors[4]);
// System.out.println("resim error errors - hsim error errors: " + ErrorDifferenceDifferences[0] + " " + ErrorDifferenceDifferences[1] + " " + ErrorDifferenceDifferences[2] + " " + ErrorDifferenceDifferences[3] + " " + ErrorDifferenceDifferences[4]);
output.add(oErrors);
output.add(simErrors);
output.add(hsimErrors);
return output;
}
use of edu.cmu.tetrad.data.DataSet in project tetrad by cmu-phil.
the class StandardizedSemIm method simulateData.
@Override
public DataSet simulateData(int sampleSize, long seed, boolean latentDataSaved) {
RandomUtil random = RandomUtil.getInstance();
long _seed = random.getSeed();
random.setSeed(seed);
DataSet dataSet = simulateData(sampleSize, latentDataSaved);
random.revertSeed(_seed);
return dataSet;
}
Aggregations