use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class IndTestHsic method empiricalHSICincompleteCholesky.
/**
* Empirical unconditional Hilbert-Schmidt Dependence Measure for X and Y given Z using incomplete Cholesky
* decomposition to approximate Gram matrices
*
* @param Gy Choleksy approximate Gram matrix for Y
* @param Gx Choleksy approximate Gram matrix for X
* @param Gz Choleksy approximate Gram matrix for Z
* @param m sample size
*/
public double empiricalHSICincompleteCholesky(TetradMatrix Gy, TetradMatrix Gx, TetradMatrix Gz, int m) {
// centralize Choleksy
int ky = Gy.columns();
int kx = Gx.columns();
int kz = Gz.columns();
TetradMatrix H = KernelUtils.constructH(m);
TetradMatrix Gcy = H.times(Gy);
TetradMatrix Gcx = H.times(Gx);
TetradMatrix Gcz = H.times(Gz);
// multiply gram matrices (first block)
TetradMatrix A = new TetradMatrix(ky, kx);
TetradMatrix Gcyt = Gcy.transpose();
A = Gcyt.times(Gcx);
TetradMatrix B = Gcy.times(A);
TetradMatrix Kyx = new TetradMatrix(m, m);
TetradMatrix Gcxt = new TetradMatrix(kx, m);
Gcxt = Gcx.transpose();
Kyx = B.times(Gcxt);
double empHSIC = 0.0;
double xy = 0.0;
for (int i = 0; i < m; i++) {
empHSIC += matrixProductEntry(B, Gcxt, i, i);
}
// second block
TetradMatrix Gytz = Gcyt.times(Gcz);
TetradMatrix Gczt = Gcz.transpose();
TetradMatrix Gztx = Gczt.times(Gcx);
TetradMatrix Gztz = Gczt.times(Gcz);
TetradMatrix Gztzr = Gztz.copy();
for (int i = 0; i < kz; i++) {
Gztzr.set(i, i, Gztz.get(i, i) + this.regularizer);
}
// invert matrix
TetradMatrix ZI = Gztzr.inverse();
TetradMatrix ZIzt = ZI.times(Gczt);
TetradMatrix Gzr = Gcz.copy();
for (int i = 0; i < m; i++) {
for (int j = 0; j < kz; j++) {
Gzr.set(i, j, Gcz.get(i, j) * (-1.0 / this.regularizer));
}
}
TetradMatrix Zinv = Gzr.times(ZIzt);
for (int i = 0; i < m; i++) {
Zinv.set(i, i, Zinv.get(i, i) + (1.0 / this.regularizer));
}
TetradMatrix Gztzinv = Gczt.times(Zinv);
TetradMatrix Gzinvz = Zinv.times(Gcz);
TetradMatrix Gztinv2z = Gztzinv.times(Gzinvz);
TetradMatrix Gytzztzinv2z = Gytz.times(Gztinv2z);
TetradMatrix Gytzztzinv2zztx = Gytzztzinv2z.times(Gztx);
TetradMatrix Gyytzztzinv2zztx = Gcy.times(Gytzztzinv2zztx);
double second = 0.0;
for (int i = 0; i < m; i++) {
second += matrixProductEntry(Gyytzztzinv2zztx, Gcxt, i, i);
}
empHSIC -= 2 * second;
// third block
TetradMatrix Gxtz = Gcxt.times(Gcz);
TetradMatrix Gxtzztinv2z = Gxtz.times(Gztinv2z);
TetradMatrix Gyytzztzinv2zztxxtzztinv2z = Gyytzztzinv2zztx.times(Gxtzztinv2z);
for (int i = 0; i < m; i++) {
empHSIC += matrixProductEntry(Gyytzztzinv2zztxxtzztinv2z, Gczt, i, i);
}
// beta z estimate
double betaz = 0.0;
for (int i = 0; i < (m - 1); i++) {
for (int j = (i + 1); j < m; j++) {
betaz += Math.pow(matrixProductEntry(Gcz, Gczt, i, j), 2);
betaz += Math.pow(matrixProductEntry(Gcz, Gczt, j, i), 2);
}
}
empHSIC *= (m / (betaz * (m - 1)));
return empHSIC;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class IndTestTrekSep method determines.
/**
* If <code>isDeterminismAllowed()</code>, deters to IndTestFisherZD; otherwise throws
* UnsupportedOperationException.
*/
public boolean determines(List<Node> z, Node x) throws UnsupportedOperationException {
int[] parents = new int[z.size()];
for (int j = 0; j < parents.length; j++) {
parents[j] = covMatrix.getVariables().indexOf(z.get(j));
}
int i = covMatrix.getVariables().indexOf(x);
TetradMatrix matrix2D = covMatrix.getMatrix();
double variance = matrix2D.get(i, i);
if (parents.length > 0) {
// Regress z onto i, yielding regression coefficients b.
TetradMatrix Czz = matrix2D.getSelection(parents, parents);
TetradMatrix inverse;
try {
inverse = Czz.inverse();
} catch (Exception e) {
return true;
}
TetradVector Cyz = matrix2D.getColumn(i);
Cyz = Cyz.viewSelection(parents);
TetradVector b = inverse.times(Cyz);
variance -= Cyz.dotProduct(b);
}
return variance < 1e-20;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class IndTestHsic method isIndependent.
/**
* Determines whether variable x is independent of variable y given a list of conditioning variables z.
*
* @param x the one variable being compared.
* @param y the second variable being compared.
* @param z the list of conditioning variables.
* @return true iff x _||_ y | z.
*/
public boolean isIndependent(Node y, Node x, List<Node> z) {
int m = sampleSize();
// choose kernels using median distance heuristic
Kernel xKernel = new KernelGaussian(1);
Kernel yKernel = new KernelGaussian(1);
List<Kernel> zKernel = new ArrayList<>();
yKernel.setDefaultBw(this.dataSet, y);
xKernel.setDefaultBw(this.dataSet, x);
if (!z.isEmpty()) {
for (int i = 0; i < z.size(); i++) {
Kernel Zi = new KernelGaussian(1);
Zi.setDefaultBw(this.dataSet, z.get(i));
zKernel.add(Zi);
}
}
// consruct Gram matricces
TetradMatrix Ky = null;
TetradMatrix Kx = null;
TetradMatrix Kz = null;
// use incomplete Cholesky to approximate
if (useIncompleteCholesky > 0) {
Ky = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(yKernel), this.dataSet, Arrays.asList(y), useIncompleteCholesky);
Kx = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(xKernel), this.dataSet, Arrays.asList(x), useIncompleteCholesky);
if (!z.isEmpty()) {
Kz = KernelUtils.incompleteCholeskyGramMatrix(zKernel, this.dataSet, z, useIncompleteCholesky);
}
} else // otherwise compute directly
{
Ky = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(yKernel), this.dataSet, Arrays.asList(y));
Kx = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(xKernel), this.dataSet, Arrays.asList(x));
if (!z.isEmpty()) {
Kz = KernelUtils.constructCentralizedGramMatrix(zKernel, this.dataSet, z);
}
}
// get Hilbert-Schmidt dependence measure
if (z.isEmpty()) {
if (useIncompleteCholesky > 0) {
this.hsic = empiricalHSICincompleteCholesky(Ky, Kx, m);
} else {
this.hsic = empiricalHSIC(Ky, Kx, m);
}
} else {
if (useIncompleteCholesky > 0) {
this.hsic = empiricalHSICincompleteCholesky(Ky, Kx, Kz, m);
} else {
this.hsic = empiricalHSIC(Ky, Kx, Kz, m);
}
}
// shuffle data for approximate the null distribution
double[] nullapprox = new double[this.perms];
int[] zind = null;
int ycol = this.dataSet.getColumn(y);
List<List<Integer>> clusterAssign = null;
if (!z.isEmpty()) {
// get clusters for z
KMeans kmeans = KMeans.randomClusters((m / 3));
zind = new int[z.size()];
for (int j = 0; j < z.size(); j++) {
zind[j] = dataSet.getColumn(z.get(j));
}
kmeans.cluster(dataSet.subsetColumns(z).getDoubleData());
clusterAssign = kmeans.getClusters();
}
for (int i = 0; i < this.perms; i++) {
DataSet shuffleData = new ColtDataSet((ColtDataSet) dataSet);
// shuffle data
if (z.isEmpty()) {
List<Integer> indicesList = new ArrayList<>();
for (int j = 0; j < m; j++) {
indicesList.add(j);
}
Collections.shuffle(indicesList);
for (int j = 0; j < m; j++) {
double shuffleVal = dataSet.getDouble(indicesList.get(j), ycol);
shuffleData.setDouble(j, ycol, shuffleVal);
}
} else {
// shuffle data within clusters
for (int j = 0; j < clusterAssign.size(); j++) {
List<Integer> shuffleCluster = new ArrayList<>(clusterAssign.get(j));
Collections.shuffle(shuffleCluster);
for (int k = 0; k < shuffleCluster.size(); k++) {
// first swap y;
double swapVal = dataSet.getDouble(clusterAssign.get(j).get(k), ycol);
shuffleData.setDouble(shuffleCluster.get(k), ycol, swapVal);
// now swap z
for (int zi = 0; zi < z.size(); zi++) {
swapVal = dataSet.getDouble(clusterAssign.get(j).get(k), zind[zi]);
shuffleData.setDouble(shuffleCluster.get(k), zind[zi], swapVal);
}
}
}
}
// reset bandwidths
yKernel.setDefaultBw(shuffleData, y);
for (int j = 0; j < z.size(); j++) {
zKernel.get(j).setDefaultBw(shuffleData, z.get(j));
}
// Gram matrices
TetradMatrix Kyn = null;
if (useIncompleteCholesky > 0) {
Kyn = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(yKernel), shuffleData, Arrays.asList(y), useIncompleteCholesky);
} else {
Kyn = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(yKernel), shuffleData, Arrays.asList(y));
}
TetradMatrix Kzn = null;
if (!z.isEmpty()) {
if (useIncompleteCholesky > 0) {
Kzn = KernelUtils.incompleteCholeskyGramMatrix(zKernel, shuffleData, z, useIncompleteCholesky);
} else {
Kzn = KernelUtils.constructCentralizedGramMatrix(zKernel, shuffleData, z);
}
}
// HSIC
if (z.isEmpty()) {
if (useIncompleteCholesky > 0) {
nullapprox[i] = empiricalHSICincompleteCholesky(Kyn, Kx, m);
} else {
nullapprox[i] = empiricalHSIC(Kyn, Kx, m);
}
} else {
if (useIncompleteCholesky > 0) {
nullapprox[i] = empiricalHSICincompleteCholesky(Kyn, Kx, Kz, m);
} else {
nullapprox[i] = empiricalHSIC(Kyn, Kx, Kz, m);
}
}
}
// permutation test to get p-value
double evalCdf = 0.0;
for (int i = 0; i < this.perms; i++) {
if (nullapprox[i] <= this.hsic) {
evalCdf += 1.0;
}
}
evalCdf /= (double) this.perms;
this.pValue = 1.0 - evalCdf;
// reject if pvalue <= alpha
if (this.pValue <= this.alpha) {
TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
return false;
}
if (verbose) {
TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
}
return true;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class IndTestFisherZPercentIndependent method isIndependent.
public boolean isIndependent(Node x, Node y, List<Node> z) {
int[] all = new int[z.size() + 2];
all[0] = variablesMap.get(x);
all[1] = variablesMap.get(y);
for (int i = 0; i < z.size(); i++) {
all[i + 2] = variablesMap.get(z.get(i));
}
int sampleSize = data.get(0).rows();
List<Double> pValues = new ArrayList<>();
for (int m = 0; m < ncov.size(); m++) {
TetradMatrix _ncov = ncov.get(m).getSelection(all, all);
TetradMatrix inv = _ncov.inverse();
double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1));
double fisherZ = sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r));
double pValue;
if (Double.isInfinite(fisherZ)) {
pValue = 0;
} else {
pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ)));
}
pValues.add(pValue);
}
double _cutoff = alpha;
if (fdr) {
_cutoff = StatUtils.fdrCutoff(alpha, pValues, false);
}
Collections.sort(pValues);
int index = (int) round((1.0 - percent) * pValues.size());
this.pValue = pValues.get(index);
// if (this.pValue == 0) {
// System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z, getScore()));
// }
boolean independent = this.pValue > _cutoff;
if (verbose) {
if (independent) {
TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
// System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getScore()));
} else {
TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
}
}
return independent;
}
use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.
the class GlassoRunner method execute.
// ===================PUBLIC METHODS OVERRIDING ABSTRACT================//
public void execute() {
Object dataModel = getDataModel();
Parameters params = getParams();
if (dataModel instanceof DataSet) {
DataSet dataSet = (DataSet) dataModel;
DoubleMatrix2D cov = new DenseDoubleMatrix2D(dataSet.getCovarianceMatrix().toArray());
Glasso glasso = new Glasso(cov);
glasso.setMaxit((int) params.get("maxit", 10000));
glasso.setIa(params.getBoolean("ia", false));
glasso.setIs(params.getBoolean("is", false));
glasso.setItr(params.getBoolean("itr", false));
glasso.setIpen(params.getBoolean("ipen", false));
glasso.setThr(params.getDouble("thr", 1e-4));
glasso.setRhoAllEqual(1.0);
Glasso.Result result = glasso.search();
TetradMatrix wwi = new TetradMatrix(result.getWwi().toArray());
List<Node> variables = dataSet.getVariables();
Graph resultGraph = new EdgeListGraph(variables);
for (int i = 0; i < variables.size(); i++) {
for (int j = i + 1; j < variables.size(); j++) {
if (wwi.get(i, j) != 0.0 && wwi.get(i, j) != 0.0) {
resultGraph.addUndirectedEdge(variables.get(i), variables.get(j));
}
}
}
setResultGraph(resultGraph);
}
}
Aggregations