Search in sources :

Example 6 with TetradMatrix

use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.

the class IndTestHsic method empiricalHSICincompleteCholesky.

/**
 * Empirical unconditional Hilbert-Schmidt Dependence Measure for X and Y given Z using incomplete Cholesky
 * decomposition to approximate Gram matrices
 *
 * @param Gy Choleksy approximate Gram matrix for Y
 * @param Gx Choleksy approximate Gram matrix for X
 * @param Gz Choleksy approximate Gram matrix for Z
 * @param m  sample size
 */
public double empiricalHSICincompleteCholesky(TetradMatrix Gy, TetradMatrix Gx, TetradMatrix Gz, int m) {
    // centralize Choleksy
    int ky = Gy.columns();
    int kx = Gx.columns();
    int kz = Gz.columns();
    TetradMatrix H = KernelUtils.constructH(m);
    TetradMatrix Gcy = H.times(Gy);
    TetradMatrix Gcx = H.times(Gx);
    TetradMatrix Gcz = H.times(Gz);
    // multiply gram matrices (first block)
    TetradMatrix A = new TetradMatrix(ky, kx);
    TetradMatrix Gcyt = Gcy.transpose();
    A = Gcyt.times(Gcx);
    TetradMatrix B = Gcy.times(A);
    TetradMatrix Kyx = new TetradMatrix(m, m);
    TetradMatrix Gcxt = new TetradMatrix(kx, m);
    Gcxt = Gcx.transpose();
    Kyx = B.times(Gcxt);
    double empHSIC = 0.0;
    double xy = 0.0;
    for (int i = 0; i < m; i++) {
        empHSIC += matrixProductEntry(B, Gcxt, i, i);
    }
    // second block
    TetradMatrix Gytz = Gcyt.times(Gcz);
    TetradMatrix Gczt = Gcz.transpose();
    TetradMatrix Gztx = Gczt.times(Gcx);
    TetradMatrix Gztz = Gczt.times(Gcz);
    TetradMatrix Gztzr = Gztz.copy();
    for (int i = 0; i < kz; i++) {
        Gztzr.set(i, i, Gztz.get(i, i) + this.regularizer);
    }
    // invert matrix
    TetradMatrix ZI = Gztzr.inverse();
    TetradMatrix ZIzt = ZI.times(Gczt);
    TetradMatrix Gzr = Gcz.copy();
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < kz; j++) {
            Gzr.set(i, j, Gcz.get(i, j) * (-1.0 / this.regularizer));
        }
    }
    TetradMatrix Zinv = Gzr.times(ZIzt);
    for (int i = 0; i < m; i++) {
        Zinv.set(i, i, Zinv.get(i, i) + (1.0 / this.regularizer));
    }
    TetradMatrix Gztzinv = Gczt.times(Zinv);
    TetradMatrix Gzinvz = Zinv.times(Gcz);
    TetradMatrix Gztinv2z = Gztzinv.times(Gzinvz);
    TetradMatrix Gytzztzinv2z = Gytz.times(Gztinv2z);
    TetradMatrix Gytzztzinv2zztx = Gytzztzinv2z.times(Gztx);
    TetradMatrix Gyytzztzinv2zztx = Gcy.times(Gytzztzinv2zztx);
    double second = 0.0;
    for (int i = 0; i < m; i++) {
        second += matrixProductEntry(Gyytzztzinv2zztx, Gcxt, i, i);
    }
    empHSIC -= 2 * second;
    // third block
    TetradMatrix Gxtz = Gcxt.times(Gcz);
    TetradMatrix Gxtzztinv2z = Gxtz.times(Gztinv2z);
    TetradMatrix Gyytzztzinv2zztxxtzztinv2z = Gyytzztzinv2zztx.times(Gxtzztinv2z);
    for (int i = 0; i < m; i++) {
        empHSIC += matrixProductEntry(Gyytzztzinv2zztxxtzztinv2z, Gczt, i, i);
    }
    // beta z estimate
    double betaz = 0.0;
    for (int i = 0; i < (m - 1); i++) {
        for (int j = (i + 1); j < m; j++) {
            betaz += Math.pow(matrixProductEntry(Gcz, Gczt, i, j), 2);
            betaz += Math.pow(matrixProductEntry(Gcz, Gczt, j, i), 2);
        }
    }
    empHSIC *= (m / (betaz * (m - 1)));
    return empHSIC;
}
Also used : TetradMatrix(edu.cmu.tetrad.util.TetradMatrix)

Example 7 with TetradMatrix

use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.

the class IndTestTrekSep method determines.

/**
 * If <code>isDeterminismAllowed()</code>, deters to IndTestFisherZD; otherwise throws
 * UnsupportedOperationException.
 */
public boolean determines(List<Node> z, Node x) throws UnsupportedOperationException {
    int[] parents = new int[z.size()];
    for (int j = 0; j < parents.length; j++) {
        parents[j] = covMatrix.getVariables().indexOf(z.get(j));
    }
    int i = covMatrix.getVariables().indexOf(x);
    TetradMatrix matrix2D = covMatrix.getMatrix();
    double variance = matrix2D.get(i, i);
    if (parents.length > 0) {
        // Regress z onto i, yielding regression coefficients b.
        TetradMatrix Czz = matrix2D.getSelection(parents, parents);
        TetradMatrix inverse;
        try {
            inverse = Czz.inverse();
        } catch (Exception e) {
            return true;
        }
        TetradVector Cyz = matrix2D.getColumn(i);
        Cyz = Cyz.viewSelection(parents);
        TetradVector b = inverse.times(Cyz);
        variance -= Cyz.dotProduct(b);
    }
    return variance < 1e-20;
}
Also used : TetradVector(edu.cmu.tetrad.util.TetradVector) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix)

Example 8 with TetradMatrix

use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.

the class IndTestHsic method isIndependent.

/**
 * Determines whether variable x is independent of variable y given a list of conditioning variables z.
 *
 * @param x the one variable being compared.
 * @param y the second variable being compared.
 * @param z the list of conditioning variables.
 * @return true iff x _||_ y | z.
 */
public boolean isIndependent(Node y, Node x, List<Node> z) {
    int m = sampleSize();
    // choose kernels using median distance heuristic
    Kernel xKernel = new KernelGaussian(1);
    Kernel yKernel = new KernelGaussian(1);
    List<Kernel> zKernel = new ArrayList<>();
    yKernel.setDefaultBw(this.dataSet, y);
    xKernel.setDefaultBw(this.dataSet, x);
    if (!z.isEmpty()) {
        for (int i = 0; i < z.size(); i++) {
            Kernel Zi = new KernelGaussian(1);
            Zi.setDefaultBw(this.dataSet, z.get(i));
            zKernel.add(Zi);
        }
    }
    // consruct Gram matricces
    TetradMatrix Ky = null;
    TetradMatrix Kx = null;
    TetradMatrix Kz = null;
    // use incomplete Cholesky to approximate
    if (useIncompleteCholesky > 0) {
        Ky = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(yKernel), this.dataSet, Arrays.asList(y), useIncompleteCholesky);
        Kx = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(xKernel), this.dataSet, Arrays.asList(x), useIncompleteCholesky);
        if (!z.isEmpty()) {
            Kz = KernelUtils.incompleteCholeskyGramMatrix(zKernel, this.dataSet, z, useIncompleteCholesky);
        }
    } else // otherwise compute directly
    {
        Ky = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(yKernel), this.dataSet, Arrays.asList(y));
        Kx = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(xKernel), this.dataSet, Arrays.asList(x));
        if (!z.isEmpty()) {
            Kz = KernelUtils.constructCentralizedGramMatrix(zKernel, this.dataSet, z);
        }
    }
    // get Hilbert-Schmidt dependence measure
    if (z.isEmpty()) {
        if (useIncompleteCholesky > 0) {
            this.hsic = empiricalHSICincompleteCholesky(Ky, Kx, m);
        } else {
            this.hsic = empiricalHSIC(Ky, Kx, m);
        }
    } else {
        if (useIncompleteCholesky > 0) {
            this.hsic = empiricalHSICincompleteCholesky(Ky, Kx, Kz, m);
        } else {
            this.hsic = empiricalHSIC(Ky, Kx, Kz, m);
        }
    }
    // shuffle data for approximate the null distribution
    double[] nullapprox = new double[this.perms];
    int[] zind = null;
    int ycol = this.dataSet.getColumn(y);
    List<List<Integer>> clusterAssign = null;
    if (!z.isEmpty()) {
        // get clusters for z
        KMeans kmeans = KMeans.randomClusters((m / 3));
        zind = new int[z.size()];
        for (int j = 0; j < z.size(); j++) {
            zind[j] = dataSet.getColumn(z.get(j));
        }
        kmeans.cluster(dataSet.subsetColumns(z).getDoubleData());
        clusterAssign = kmeans.getClusters();
    }
    for (int i = 0; i < this.perms; i++) {
        DataSet shuffleData = new ColtDataSet((ColtDataSet) dataSet);
        // shuffle data
        if (z.isEmpty()) {
            List<Integer> indicesList = new ArrayList<>();
            for (int j = 0; j < m; j++) {
                indicesList.add(j);
            }
            Collections.shuffle(indicesList);
            for (int j = 0; j < m; j++) {
                double shuffleVal = dataSet.getDouble(indicesList.get(j), ycol);
                shuffleData.setDouble(j, ycol, shuffleVal);
            }
        } else {
            // shuffle data within clusters
            for (int j = 0; j < clusterAssign.size(); j++) {
                List<Integer> shuffleCluster = new ArrayList<>(clusterAssign.get(j));
                Collections.shuffle(shuffleCluster);
                for (int k = 0; k < shuffleCluster.size(); k++) {
                    // first swap y;
                    double swapVal = dataSet.getDouble(clusterAssign.get(j).get(k), ycol);
                    shuffleData.setDouble(shuffleCluster.get(k), ycol, swapVal);
                    // now swap z
                    for (int zi = 0; zi < z.size(); zi++) {
                        swapVal = dataSet.getDouble(clusterAssign.get(j).get(k), zind[zi]);
                        shuffleData.setDouble(shuffleCluster.get(k), zind[zi], swapVal);
                    }
                }
            }
        }
        // reset bandwidths
        yKernel.setDefaultBw(shuffleData, y);
        for (int j = 0; j < z.size(); j++) {
            zKernel.get(j).setDefaultBw(shuffleData, z.get(j));
        }
        // Gram matrices
        TetradMatrix Kyn = null;
        if (useIncompleteCholesky > 0) {
            Kyn = KernelUtils.incompleteCholeskyGramMatrix(Arrays.asList(yKernel), shuffleData, Arrays.asList(y), useIncompleteCholesky);
        } else {
            Kyn = KernelUtils.constructCentralizedGramMatrix(Arrays.asList(yKernel), shuffleData, Arrays.asList(y));
        }
        TetradMatrix Kzn = null;
        if (!z.isEmpty()) {
            if (useIncompleteCholesky > 0) {
                Kzn = KernelUtils.incompleteCholeskyGramMatrix(zKernel, shuffleData, z, useIncompleteCholesky);
            } else {
                Kzn = KernelUtils.constructCentralizedGramMatrix(zKernel, shuffleData, z);
            }
        }
        // HSIC
        if (z.isEmpty()) {
            if (useIncompleteCholesky > 0) {
                nullapprox[i] = empiricalHSICincompleteCholesky(Kyn, Kx, m);
            } else {
                nullapprox[i] = empiricalHSIC(Kyn, Kx, m);
            }
        } else {
            if (useIncompleteCholesky > 0) {
                nullapprox[i] = empiricalHSICincompleteCholesky(Kyn, Kx, Kz, m);
            } else {
                nullapprox[i] = empiricalHSIC(Kyn, Kx, Kz, m);
            }
        }
    }
    // permutation test to get p-value
    double evalCdf = 0.0;
    for (int i = 0; i < this.perms; i++) {
        if (nullapprox[i] <= this.hsic) {
            evalCdf += 1.0;
        }
    }
    evalCdf /= (double) this.perms;
    this.pValue = 1.0 - evalCdf;
    // reject if pvalue <= alpha
    if (this.pValue <= this.alpha) {
        TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
        return false;
    }
    if (verbose) {
        TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
    }
    return true;
}
Also used : KMeans(edu.cmu.tetrad.cluster.KMeans) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) ArrayList(java.util.ArrayList) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) ArrayList(java.util.ArrayList) List(java.util.List) Kernel(edu.cmu.tetrad.search.kernel.Kernel) KernelGaussian(edu.cmu.tetrad.search.kernel.KernelGaussian)

Example 9 with TetradMatrix

use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.

the class IndTestFisherZPercentIndependent method isIndependent.

public boolean isIndependent(Node x, Node y, List<Node> z) {
    int[] all = new int[z.size() + 2];
    all[0] = variablesMap.get(x);
    all[1] = variablesMap.get(y);
    for (int i = 0; i < z.size(); i++) {
        all[i + 2] = variablesMap.get(z.get(i));
    }
    int sampleSize = data.get(0).rows();
    List<Double> pValues = new ArrayList<>();
    for (int m = 0; m < ncov.size(); m++) {
        TetradMatrix _ncov = ncov.get(m).getSelection(all, all);
        TetradMatrix inv = _ncov.inverse();
        double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1));
        double fisherZ = sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r));
        double pValue;
        if (Double.isInfinite(fisherZ)) {
            pValue = 0;
        } else {
            pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ)));
        }
        pValues.add(pValue);
    }
    double _cutoff = alpha;
    if (fdr) {
        _cutoff = StatUtils.fdrCutoff(alpha, pValues, false);
    }
    Collections.sort(pValues);
    int index = (int) round((1.0 - percent) * pValues.size());
    this.pValue = pValues.get(index);
    // if (this.pValue == 0) {
    // System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z, getScore()));
    // }
    boolean independent = this.pValue > _cutoff;
    if (verbose) {
        if (independent) {
            TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
        // System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getScore()));
        } else {
            TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
        }
    }
    return independent;
}
Also used : TetradMatrix(edu.cmu.tetrad.util.TetradMatrix)

Example 10 with TetradMatrix

use of edu.cmu.tetrad.util.TetradMatrix in project tetrad by cmu-phil.

the class GlassoRunner method execute.

// ===================PUBLIC METHODS OVERRIDING ABSTRACT================//
public void execute() {
    Object dataModel = getDataModel();
    Parameters params = getParams();
    if (dataModel instanceof DataSet) {
        DataSet dataSet = (DataSet) dataModel;
        DoubleMatrix2D cov = new DenseDoubleMatrix2D(dataSet.getCovarianceMatrix().toArray());
        Glasso glasso = new Glasso(cov);
        glasso.setMaxit((int) params.get("maxit", 10000));
        glasso.setIa(params.getBoolean("ia", false));
        glasso.setIs(params.getBoolean("is", false));
        glasso.setItr(params.getBoolean("itr", false));
        glasso.setIpen(params.getBoolean("ipen", false));
        glasso.setThr(params.getDouble("thr", 1e-4));
        glasso.setRhoAllEqual(1.0);
        Glasso.Result result = glasso.search();
        TetradMatrix wwi = new TetradMatrix(result.getWwi().toArray());
        List<Node> variables = dataSet.getVariables();
        Graph resultGraph = new EdgeListGraph(variables);
        for (int i = 0; i < variables.size(); i++) {
            for (int j = i + 1; j < variables.size(); j++) {
                if (wwi.get(i, j) != 0.0 && wwi.get(i, j) != 0.0) {
                    resultGraph.addUndirectedEdge(variables.get(i), variables.get(j));
                }
            }
        }
        setResultGraph(resultGraph);
    }
}
Also used : Parameters(edu.cmu.tetrad.util.Parameters) DataSet(edu.cmu.tetrad.data.DataSet) Node(edu.cmu.tetrad.graph.Node) EdgeListGraph(edu.cmu.tetrad.graph.EdgeListGraph) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix) EdgeListGraph(edu.cmu.tetrad.graph.EdgeListGraph) Graph(edu.cmu.tetrad.graph.Graph) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D)

Aggregations

TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)161 TetradVector (edu.cmu.tetrad.util.TetradVector)46 ArrayList (java.util.ArrayList)43 Node (edu.cmu.tetrad.graph.Node)41 List (java.util.List)12 CovarianceMatrix (edu.cmu.tetrad.data.CovarianceMatrix)10 DepthChoiceGenerator (edu.cmu.tetrad.util.DepthChoiceGenerator)9 SingularMatrixException (org.apache.commons.math3.linear.SingularMatrixException)9 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)8 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)8 Test (org.junit.Test)8 Regression (edu.cmu.tetrad.regression.Regression)7 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)7 SemIm (edu.cmu.tetrad.sem.SemIm)7 Graph (edu.cmu.tetrad.graph.Graph)6 SemPm (edu.cmu.tetrad.sem.SemPm)6 Vector (java.util.Vector)6 DoubleArrayList (cern.colt.list.DoubleArrayList)5 DataSet (edu.cmu.tetrad.data.DataSet)5 ICovarianceMatrix (edu.cmu.tetrad.data.ICovarianceMatrix)5