use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class ConditionalGaussianOtherLikelihood method likelihoodJoint.
// The likelihood of the joint over all of these mixedVariables, assuming conditional Gaussian,
// continuous and discrete.
private Ret likelihoodJoint(List<ContinuousVariable> X, List<DiscreteVariable> A, Node target) {
A = new ArrayList<>(A);
X = new ArrayList<>(X);
if (target instanceof DiscreteVariable) {
for (ContinuousVariable x : new ArrayList<>(X)) {
final Node variable = dataSet.getVariable(x.getName());
if (variable != null) {
A.add((DiscreteVariable) variable);
X.remove(x);
}
}
}
int k = X.size();
int[] continuousCols = new int[k];
for (int j = 0; j < k; j++) continuousCols[j] = nodesHash.get(X.get(j));
int N = mixedDataSet.getNumRows();
double c1 = 0, c2 = 0;
List<List<Integer>> cells = adTree.getCellLeaves(A);
for (List<Integer> cell : cells) {
int a = cell.size();
if (a == 0)
continue;
if (A.size() > 0) {
c1 += a * multinomialLikelihood(a, N);
}
if (X.size() > 0) {
try {
// Determinant will be zero if data are linearly dependent.
if (a > continuousCols.length + 10) {
TetradMatrix cov = cov(getSubsample(continuousCols, cell));
c2 += a * gaussianLikelihood(k, cov);
} else {
TetradMatrix cov = cov(getSubsample(continuousCols, all));
c2 += a * gaussianLikelihood(k, cov);
}
} catch (Exception e) {
// No contribution.
}
}
}
final double lnL = c1 + c2;
int p = (int) getPenaltyDiscount();
// Only count dof for continuous cells that contributed to the likelihood calculation.
final int dof = p * f(A) * h(X) + f(A);
return new Ret(lnL, dof);
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class ConditionalGaussianOtherLikelihood method likelihoodMixed.
// For cases like P(C | X). This is a ratio of joints, but if the numerator is conditional Gaussian,
// the denominator is a mixture of Gaussians.
private Ret likelihoodMixed(List<ContinuousVariable> X, List<DiscreteVariable> A, DiscreteVariable B) {
final int k = X.size();
final double g = Math.pow(2.0 * Math.PI, -0.5 * k) * Math.exp(-0.5 * k);
int[] continuousCols = new int[k];
for (int j = 0; j < k; j++) continuousCols[j] = nodesHash.get(X.get(j));
double lnL = 0.0;
int N = dataSet.getNumRows();
List<List<List<Integer>>> cells = adTree.getCellLeaves(A, B);
TetradMatrix defaultCov = null;
for (List<List<Integer>> mycells : cells) {
List<TetradMatrix> x = new ArrayList<>();
List<TetradMatrix> sigmas = new ArrayList<>();
List<TetradMatrix> inv = new ArrayList<>();
List<TetradVector> mu = new ArrayList<>();
for (List<Integer> cell : mycells) {
TetradMatrix subsample = getSubsample(continuousCols, cell);
try {
// Determinant will be zero if data are linearly dependent.
if (mycells.size() <= continuousCols.length)
throw new IllegalArgumentException();
TetradMatrix cov = cov(subsample);
TetradMatrix covinv = cov.inverse();
if (defaultCov == null) {
defaultCov = cov;
}
x.add(subsample);
sigmas.add(cov);
inv.add(covinv);
mu.add(means(subsample));
} catch (Exception e) {
// No contribution.
}
}
double[] factors = new double[x.size()];
for (int u = 0; u < x.size(); u++) {
factors[u] = g * Math.pow(sigmas.get(u).det(), -0.5);
}
double[] a = new double[x.size()];
for (int u = 0; u < x.size(); u++) {
for (int i = 0; i < x.get(u).rows(); i++) {
for (int v = 0; v < x.size(); v++) {
final TetradVector xm = x.get(u).getRow(i).minus(mu.get(v));
a[v] = prob(factors[v], inv.get(v), xm);
}
double num = a[u] * p(x, u, N);
double denom = 0.0;
for (int v = 0; v < x.size(); v++) {
denom += a[v] * (p(x, v, N));
}
lnL += log(num) - log(denom);
}
}
}
int p = (int) getPenaltyDiscount();
// Only count dof for continuous cells that contributed to the likelihood calculation.
int dof = f(A) * B.getNumCategories() + f(A) * p * h(X);
return new Ret(lnL, dof);
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class FactorAnalysis method successiveResidual.
// ================= COMMUNALITY ESTIMATES =================//
/**
* Successive method with residual matrix.
* <p>
* This algorithm makes use of a helper algorithm. Together they solve for an unrotated
* factor loading matrix.
* <p>
* This method calls upon its helper to find column vectors, with which it constructs its
* factor loading matrix. Upon receiving each successive column vector from its helper
* method, it makes sure that we want to keep this vector instead of discarding it. After
* keeping a vector, a residual matrix is calculated, upon which solving for the next column
* vector is directly dependent.
* <p>
* We stop looking for new vectors either when we've accounted for close to all of the variance in
* the original correlation matrix, or when the "d scalar" for a new vector is less than 1 (the
* d-scalar is the corresponding diagonal for the factor loading matrix -- thus, when it's less
* than 1, the vector we've solved for barely accounts for any more variance). This means we've
* already "pulled out" all of the variance we can from the residual matrix, and we should stop
* as further factors don't explain much more (and serve to complicate the model).
* <p>
* PSEUDO-CODE:
* <p>
* 0th Residual Matrix = Original Correlation Matrix
* Ask helper for the 1st factor (first column vector in our factor loading vector)
* Add 1st factor's d-scalar (for i'th factor, call its d-scalar the i'th d-scalar) to a list of d-scalars.
* <p>
* While the ratio of the sum of d-scalars to the trace of the original correlation matrix is less than .99
* (in other words, while we haven't accounted for practically all of the variance):
* <p>
* i'th residual matrix = (i - 1)'th residual matrix SUBTRACT the major product moment of (i - 1)'th factor loading vector
* Ask helper for i'th factor
* If i'th factor's d-value is less than 1, throw it out and end loop.
* Otherwise, add it to the factor loading matrix and continue loop.
* <p>
* END PSEUDO-CODE
* <p>
* At the end of the method, the list of column vectors is actually assembled into a TetradMatrix.
*/
public TetradMatrix successiveResidual() {
this.factorLoadingVectors = new LinkedList<>();
this.dValues = new LinkedList<>();
TetradMatrix residual = covariance.getMatrix().copy();
TetradMatrix unitVector = new TetradMatrix(residual.rows(), 1);
for (int i = 0; i < unitVector.rows(); i++) {
unitVector.set(i, 0, 1);
}
for (int i = 0; i < getNumFactors(); i++) {
boolean found = successiveResidualHelper(residual, unitVector);
if (!found)
break;
TetradMatrix f = factorLoadingVectors.getLast();
residual = residual.minus(f.times(f.transpose()));
}
factorLoadingVectors.removeFirst();
TetradMatrix result = new TetradMatrix(residual.rows(), factorLoadingVectors.size());
for (int i = 0; i < result.rows(); i++) {
for (int j = 0; j < result.columns(); j++) {
result.set(i, j, factorLoadingVectors.get(j).get(i, 0));
}
}
this.residual = residual;
return result;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class FactorAnalysis method successiveResidualHelper.
// ------------------Private methods-------------------//
/**
* Helper method for the basic structure successive factor method above.
* Takes a residual matrix and a approximation vector, and finds both
* the factor loading vector and the "d scalar" which is used to determine
* the amount of total variance accounted for so far.
* <p>
* The helper takes, to begin with, the unit vector as its approximation to the
* factor column vector. With each iteration, it approximates a bit closer --
* the d-scalar for each successive step eventually converges to a value (provably).
* <p>
* Thus, the ratio between the last iteration's d-scalar and this iteration's d-scalar
* should approach 1. When this ratio gets sufficiently close to 1, the algorithm halts
* and returns its getModel approximation.
* <p>
* Important to note: the residual matrix stays fixed for this entire algorithm.
* <p>
* PSEUDO-CODE:
* <p>
* Calculate the 0'th d-scalar, which is done with the following few calculations:
* 0'th U Vector = residual matrix * approximation vector (this is just the unit vector for the 0'th)
* 0'th L Scalar = transpose(approximation vector) * U Vector
* 0'th d-scalar = square root(L Scalar)
* 0'th approximation to factor loading (A Vector) = 0'th U Vector / 0'th d-scalar
* <p>
* <p>
* While the ratio of the new d-scalar to the old is not sufficiently close to 1
* (or if we haven't approximated 100 times yet, a failsafe):
* <p>
* i'th U Vector = residual matrix * (i - 1)'th factor loading
* i'th L Scalar = transpose((i - 1)'th factor loading) * i'th U Vector
* i'th D Scalar = square root(i'th L Scalar)
* i'th factor loading = i'th U Vector / i'th D Scalar
* <p>
* Return the final i'th factor loading as our best approximation.
*/
private boolean successiveResidualHelper(TetradMatrix residual, TetradMatrix approximationVector) {
TetradMatrix l0 = approximationVector.transpose().times(residual).times(approximationVector);
if (l0.get(0, 0) < 0) {
return false;
}
double d = Math.sqrt(l0.get(0, 0));
TetradMatrix f = residual.times(approximationVector).scalarMult(1.0 / d);
for (int i = 0; i < 100; i++) {
TetradMatrix ui = residual.times(f);
TetradMatrix li = f.transpose().times(ui);
double di = Math.sqrt(li.get(0, 0));
if (abs((d - di)) <= getThreshold()) {
break;
}
d = di;
f = ui.scalarMult(1.0 / d);
}
this.dValues.add(d);
this.factorLoadingVectors.add(f);
return true;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class LingamPattern2 method getScore1.
// =============================PRIVATE METHODS=========================//
private Score getScore1(Graph dag, List<TetradMatrix> data, List<Node> variables) {
// System.out.println("Scoring DAG: " + dag);
List<Regression> regressions = new ArrayList<>();
for (TetradMatrix _data : data) {
regressions.add(new RegressionDataset(_data, variables));
}
int totalSampleSize = 0;
for (TetradMatrix _data : data) {
totalSampleSize += _data.rows();
}
int numCols = data.get(0).columns();
List<Node> nodes = dag.getNodes();
double score = 0.0;
double[] pValues = new double[nodes.size()];
TetradMatrix absoluteStandardizedResiduals = new TetradMatrix(totalSampleSize, numCols);
for (int i = 0; i < nodes.size(); i++) {
List<Double> _absoluteStandardizedResiduals = new ArrayList<>();
for (int j = 0; j < data.size(); j++) {
Node _target = nodes.get(i);
List<Node> _regressors = dag.getParents(_target);
Node target = getVariable(variables, _target.getName());
List<Node> regressors = new ArrayList<>();
for (Node _regressor : _regressors) {
Node variable = getVariable(variables, _regressor.getName());
regressors.add(variable);
}
RegressionResult result = regressions.get(j).regress(target, regressors);
TetradVector residualsColumn = result.getResiduals();
DoubleArrayList _absoluteStandardizedResidualsColumn = new DoubleArrayList(residualsColumn.toArray());
double mean = Descriptive.mean(_absoluteStandardizedResidualsColumn);
double std = Descriptive.standardDeviation(Descriptive.variance(_absoluteStandardizedResidualsColumn.size(), Descriptive.sum(_absoluteStandardizedResidualsColumn), Descriptive.sumOfSquares(_absoluteStandardizedResidualsColumn)));
for (int i2 = 0; i2 < _absoluteStandardizedResidualsColumn.size(); i2++) {
_absoluteStandardizedResidualsColumn.set(i2, (_absoluteStandardizedResidualsColumn.get(i2) - mean) / std);
_absoluteStandardizedResidualsColumn.set(i2, Math.abs(_absoluteStandardizedResidualsColumn.get(i2)));
}
for (int k = 0; k < _absoluteStandardizedResidualsColumn.size(); k++) {
_absoluteStandardizedResiduals.add(_absoluteStandardizedResidualsColumn.get(k));
}
}
DoubleArrayList absoluteStandardResidualsList = new DoubleArrayList(absoluteStandardizedResiduals.getColumn(i).toArray());
for (int k = 0; k < _absoluteStandardizedResiduals.size(); k++) {
absoluteStandardizedResiduals.set(k, i, _absoluteStandardizedResiduals.get(k));
}
double _mean = Descriptive.mean(absoluteStandardResidualsList);
double diff = _mean - Math.sqrt(2.0 / Math.PI);
score += diff * diff;
}
for (int j = 0; j < absoluteStandardizedResiduals.columns(); j++) {
double[] x = absoluteStandardizedResiduals.getColumn(j).toArray();
double p = new AndersonDarlingTest(x).getP();
pValues[j] = p;
}
return new Score(score, pValues);
}
Aggregations