use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.
the class MeasurementSimulator method simulate.
/**
* Simulates (optionally) neither, either, or both of two three-dimensionaly
* data sets, rawData and measuredData. For the form of the first set, see
* <code>getRawData</code>; for the form of the second set, see
* <code>getMeasuredData</code>. The idea of the simulation is that cells
* are grown in different dishes and periodically sampled. When they are
* sampled, their DNA is ground up and then pipetted in aggregate onto a
* number of microarray chips. Each indivual cell is simulated separately;
* the simulated cells are grouped into dishes. Expression levels of cells
* in each dish are aggregated and then random noise is added to this
* aggregate expression level representing various sources of noise: sample
* to sample variation, chip to chip variation, and pixellation measurement
* variation. For more information, see the Genetic Simulator spec, which
* this method implements.
*
* @see #getRawData
* @see #getMeasuredData
*/
public void simulate(GeneHistory history) {
setHistory(history);
history.reset();
// Get the update function.
UpdateFunction updateFunction = history.getUpdateFunction();
int numFactors = updateFunction.getNumFactors();
// Set up the dish model.
DishModel dishModel = new DishModel(getNumDishes(), getDishDishVariability());
// Set up the history.
history.setInitSync(isInitSync());
history.setDishModel(dishModel);
// We will use two data cubes to store data for display
// purposes: a measurement cube and a raw data cube. We first
// construct the measurement cube, with two extra columns for
// (a) dish number and (b) chip number.
int numChips = getNumSamplesPerDish();
if (isMeasuredDataSaved()) {
int numSteps = timeSteps.length;
int numSamples = getNumDishes() * numChips;
this.measuredData = new double[numFactors][numSteps][numSamples];
}
// We next construct the raw data cube, with two extra columns
// for (a) individual number and (b) dish number. Raw data is
// saved out only if the 'rawDataSave' parameter is true. If
// it's not true, these objects are not constructed. (They can
// be extremely large.) References to the cube and the initial
// columns need to be saved in this scope for future use (but
// may be null).
this.rawData = null;
if (isRawDataSaved()) {
int numSteps = timeSteps.length;
int numCells = getNumDishes() * getNumCellsPerDish();
this.rawData = new double[numFactors][numSteps][numCells];
}
// There are three sources of error--sample to sample, chip to
// chip, and pixel digitalization. A certain number of
// samples are drawn from each dish, and these are deposited
// on the same number of chips. There is an error associated
// with each chip and with each sample. (Pixel digitalization
// error is drawn later.)
double sampleSd = getSampleSampleVariability();
double chipSd = getChipChipVariability();
double pixelSd = getPixelDigitalization();
Distribution sampleErrorDist = new Normal(0.0, sampleSd);
Distribution chipErrorDist = new Normal(0.0, chipSd);
Distribution pixelErrorDist = new Normal(0.0, pixelSd);
double[][] chipErrors = new double[getNumDishes()][numChips];
double[][] sampleErrors = new double[getNumDishes()][numChips];
for (int d = 0; d < getNumDishes(); d++) {
for (int ch = 0; ch < numChips; ch++) {
chipErrors[d][ch] = chipErrorDist.nextRandom();
sampleErrors[d][ch] = sampleErrorDist.nextRandom();
}
}
// Make two double[] arrays, one to store data generated for a
// particular cell (in a particular dish) and one to store
// aggregations of these data.
double[][] cellData = new double[timeSteps.length][numFactors];
double[][] aggregation = new double[cellData.length][cellData[0].length];
// 'aggregation'. First, iterate over dishes.
for (int d = 0; d < getNumDishes(); d++) {
dishModel.setDishNumber(d);
// Reset the aggregation.
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
Arrays.fill(aggregation[sIndex], 0);
}
// Next, iterate over the cells in a dish.
for (int c = 0; c < getNumCellsPerDish(); c++) {
// 12/01/01
if ((c + 1) % 50 == 0) {
this.dishNumber = d;
this.cellNumber = c;
// System.out.println("Dish # " + (d + 1) + ", Cell # " +
// (c + 1));
}
// Reset the cell data.
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
Arrays.fill(cellData[sIndex], 0);
}
// Initialize the history array.
history.initialize();
// Generate data for one cell, storing only those time
// steps that are indicated in the timeSteps
// array. Note that the timeSteps array is 1-indexed,
// while the 's' variable is 0-indexed. (Might want to
// fix this in a future version.)
int s = -1;
int sIndex = 0;
// for the steps in timeSteps[].
while (++s < getStepsGenerated()) {
// being synchronized.
if (s > 0) {
history.update();
}
if (s == timeSteps[sIndex] - 1) {
double[][] historyArray = history.getHistoryArray();
// the factors (genes).
for (int f = 0; f < numFactors; f++) {
// Copy data from the getModel time step in
// the history to the cellData[][] array.
cellData[sIndex][f] = historyArray[0][f];
// Antilog it if necessary.
if (isAntilogCalculated()) {
cellData[sIndex][f] = Math.exp(cellData[sIndex][f]);
}
// be null.)
if (isRawDataSaved()) {
int row = d * getNumCellsPerDish() + c;
/*this.rawData[f][sIndex][row] =
historyArray[0][f];*/
this.rawData[f][sIndex][row] = cellData[sIndex][f];
}
}
if (++sIndex >= timeSteps.length) {
break;
}
}
// END if (s == timeSteps[sIndex])
}
// Aggregate data.
for (int i = 0; i < cellData.length; i++) {
for (int j = 0; j < cellData[0].length; j++) {
aggregation[i][j] += cellData[i][j];
}
}
}
if (isMeasuredDataSaved()) {
// generated.)
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
for (int f = 0; f < numFactors; f++) {
for (int ch = 0; ch < numChips; ch++) {
double average = aggregation[sIndex][f] / getNumCellsPerDish();
double pixelError = pixelErrorDist.nextRandom();
double measurement = average + sampleErrors[d][ch] + chipErrors[d][ch] + pixelError;
this.measuredData[f][sIndex][d * numChips + ch] = measurement;
}
// END for (int ch = 0; ch < numChips; ch++)
}
// END for (int f = 0; f < history.getNumFacto...
}
// END for (int sIndex = 0; sIndex < timeSteps...
}
// END if (measuredDataSaved)
}
return;
}
use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.
the class TestLingamPattern method test1.
@Test
public void test1() {
RandomUtil.getInstance().setSeed(4938492L);
int sampleSize = 1000;
List<Node> nodes = new ArrayList<>();
for (int i = 0; i < 6; i++) {
nodes.add(new ContinuousVariable("X" + (i + 1)));
}
Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 6, 4, 4, 4, false));
List<Distribution> variableDistributions = new ArrayList<>();
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Uniform(-1, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
SemPm semPm = new SemPm(graph);
SemIm semIm = new SemIm(semPm);
DataSet dataSet = simulateDataNonNormal(semIm, sampleSize, variableDistributions);
Score score = new SemBicScore(new CovarianceMatrixOnTheFly(dataSet));
Graph estPattern = new Fges(score).search();
LingamPattern lingam = new LingamPattern(estPattern, dataSet);
lingam.search();
double[] pvals = lingam.getPValues();
double[] expectedPVals = { 0.18, 0.29, 0.88, 0.00, 0.01, 0.58 };
for (int i = 0; i < pvals.length; i++) {
assertEquals(expectedPVals[i], pvals[i], 0.01);
}
}
use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.
the class QQPlot method testPlot.
// ============================ Private Methods =======================//
/**
* Used to test this class.
*
* Generates a continuous test variable and q-q plots it.
*/
private void testPlot() {
ContinuousVariable c = new ContinuousVariable("test");
if (dataSet.getVariable("test") == null)
dataSet.addVariable(c);
ContinuousVariable c2 = new ContinuousVariable("test2");
if (dataSet.getVariable("test2") == null)
dataSet.addVariable(c2);
this.selectedVariable = c;
int columnIndex = dataSet.getColumn(c);
Normal g = new Normal(1, 1);
Exponential e = new Exponential(1);
double mean = 0.0;
double sd = 0.0;
this.minData = 10000000000000.0;
this.maxData = 0.0;
this.minComparison = 1000000000000.0;
this.maxComparison = 0.0;
for (int i = 0; i < dataSet.getNumRows(); i++) {
double value = g.nextRandom();
double value2 = e.nextRandom();
dataSet.setDouble(i, columnIndex, value);
dataSet.setDouble(i, columnIndex + 1, value2);
mean += value;
if (value < this.minData)
this.minData = value;
if (value > this.maxData)
this.maxData = value;
// System.out.println(value);
// System.out.println(mean);
}
// System.out.println(this.dataSet.getNumRows());
NormalityTests.kolmogorovSmirnov(dataSet, c2);
// sort the dataset
for (int i = 0; i < dataSet.getNumRows(); i++) {
for (int k = i; k < dataSet.getNumRows(); k++) {
if (dataSet.getDouble(i, columnIndex) > dataSet.getDouble(k, columnIndex)) {
double temp = dataSet.getDouble(i, columnIndex);
dataSet.setDouble(i, columnIndex, dataSet.getDouble(k, columnIndex));
dataSet.setDouble(k, columnIndex, temp);
}
}
}
if (mean == 0.0)
mean = 1.0;
else
mean /= dataSet.getNumRows();
for (int i = 0; i < dataSet.getNumRows(); i++) {
sd += (dataSet.getDouble(i, columnIndex) - mean) * (dataSet.getDouble(i, columnIndex) - mean);
// System.out.println(dataSet.getDouble(i, columnIndex));
// System.out.println(sd);
}
if (sd == 0.0) {
sd = 1.0;
} else {
sd /= dataSet.getNumRows() - 1.0;
sd = Math.sqrt(sd);
}
// System.out.println("Mean: " + mean + " SD: " + sd + " Min: " + this.minData + " Max: " + this.maxData);
this.comparison = new cern.jet.random.Normal(mean, sd, new MersenneTwister());
calculateComparisonSet(this.comparison, this.dataSet);
if (this.minData < this.minComparison)
this.min = this.minData;
else
this.min = this.minComparison;
if (this.maxData > this.maxComparison)
this.max = this.maxData;
else
this.max = this.maxComparison;
// end test code
}
use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.
the class SemPm method initializeParams.
private void initializeParams() {
// Note that a distinction between parameterizable and non-parameterizable nodes is being added
// to accomodate time lag graphs. jdramsey 4/14/10.
List<Parameter> parameters = new ArrayList<>();
Set<Edge> edges = graph.getEdges();
Collections.sort(new ArrayList<>(edges), new Comparator<Edge>() {
public int compare(Edge o1, Edge o2) {
int compareFirst = o1.getNode1().getName().compareTo(o2.getNode1().toString());
int compareSecond = o1.getNode2().getName().compareTo(o2.getNode2().toString());
if (compareFirst != 0) {
return compareFirst;
}
if (compareSecond != 0) {
return compareSecond;
}
return 0;
}
});
// aren't error edges *and are into parameterizable node* (the last bit jdramsey 4/14/10).
for (Edge edge : edges) {
if (edge.getNode1() == edge.getNode2()) {
continue;
// throw new IllegalStateException("There should not be any" +
// "edges from a node to itself in a SemGraph: " + edge);
}
if (!SemGraph.isErrorEdge(edge) && edge.getEndpoint1() == Endpoint.TAIL && edge.getEndpoint2() == Endpoint.ARROW) {
if (!graph.isParameterizable(edge.getNode2())) {
continue;
}
Parameter param = new Parameter(newBName(), ParamType.COEF, edge.getNode1(), edge.getNode2());
param.setDistribution(new Split(0.5, 1.5));
// param.setDistribution(new SplitDistributionSpecial(0.5, 1.5));
// param.setDistribution(new UniformDistribution(-0.2, 0.2));
// param.setDistribution(coefDistribution);
parameters.add(param);
}
}
// Add error variance freeParameters for exogenous nodes of all *parameterizable* nodes.
for (Node node : getVariableNodes()) {
if (!graph.isParameterizable(node)) {
continue;
}
Parameter param = new Parameter(newTName(), ParamType.VAR, node, node);
param.setDistribution(new Uniform(1.0, 3.0));
parameters.add(param);
}
// connect exogenous nodes.)
for (Edge edge : edges) {
if (Edges.isBidirectedEdge(edge)) {
Node node1 = edge.getNode1();
Node node2 = edge.getNode2();
// no...
if (!graph.isParameterizable(node1) || !graph.isParameterizable(node2)) {
continue;
}
node1 = getGraph().getVarNode(node1);
node2 = getGraph().getVarNode(node2);
Parameter param = new Parameter(newTName(), ParamType.COVAR, node1, node2);
param.setDistribution(new SingleValue(0.2));
parameters.add(param);
}
}
// Add mean freeParameters for all parameterizable nodes.
for (Node node : getVariableNodes()) {
if (!graph.isParameterizable(node)) {
continue;
}
Parameter mean = new Parameter(newMName(), ParamType.MEAN, node, node);
mean.setDistribution(new Normal(0.0, 1.0));
parameters.add(mean);
}
this.parameters = Collections.unmodifiableList(parameters);
}
Aggregations