Search in sources :

Example 1 with GramTask

use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.

the class GramMatrixTest method testProstate.

@Test
public void testProstate() {
    File f2 = find_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
    Key ikey2 = NFSFileVec.make(f2);
    Key okey2 = Key.make("glm_model2");
    Frame fr2 = null;
    try {
        fr2 = ParseDataset2.parse(okey2, new Key[] { ikey2 });
        DataInfo dinfo = new DataInfo(fr2, 0, true, false, DataInfo.TransformType.NONE);
        GramTask gt = new GramTask(null, dinfo, true, false);
        gt.doAll(dinfo._adaptedFrame);
        double[][] res = gt._gram.getXX();
        System.out.println(Utils.pprint(gt._gram.getXX()));
        for (int i = 0; i < exp_result.length; ++i) for (int j = 0; j < exp_result.length; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
        gt = new GramTask(null, dinfo, false, false);
        gt.doAll(dinfo._adaptedFrame);
        for (int i = 0; i < exp_result.length - 1; ++i) for (int j = 0; j < exp_result.length - 1; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
    } finally {
        fr2.delete();
    }
}
Also used : DataInfo(hex.FrameTask.DataInfo) GramTask(hex.gram.Gram.GramTask) File(java.io.File) Test(org.junit.Test)

Example 2 with GramTask

use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.

the class PCA method execImpl.

@Override
protected void execImpl() {
    Frame fr = selectFrame(source);
    Vec[] vecs = fr.vecs();
    // Remove constant cols and cols with too many NAs
    ArrayList<Integer> removeCols = new ArrayList<Integer>();
    for (int i = 0; i < vecs.length; i++) {
        if (vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length() * 0.2)
            // if(vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length()*0.2 || vecs[i].domain() != null)
            removeCols.add(i);
    }
    if (!removeCols.isEmpty()) {
        int[] cols = new int[removeCols.size()];
        for (int i = 0; i < cols.length; i++) cols[i] = removeCols.get(i);
        fr.remove(cols);
    }
    if (fr.numCols() < 2)
        throw new IllegalArgumentException("Need more than one column to run PCA");
    DataInfo dinfo = new DataInfo(fr, 0, false, false, standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE);
    GramTask tsk = new GramTask(self(), dinfo, false, false).doAll(dinfo._adaptedFrame);
    PCAModel myModel = buildModel(dinfo, tsk);
    myModel.delete_and_lock(self());
    myModel.unlock(self());
    // Close/remove job
    remove();
    final JobState state = UKV.<Job>get(self()).state;
    new TAtomic<PCAModel>() {

        @Override
        public PCAModel atomic(PCAModel m) {
            if (m != null)
                m.get_params().state = state;
            return m;
        }
    }.invoke(dest());
}
Also used : DataInfo(hex.FrameTask.DataInfo) Frame(water.fvec.Frame) ArrayList(java.util.ArrayList) Vec(water.fvec.Vec) GramTask(hex.gram.Gram.GramTask)

Example 3 with GramTask

use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.

the class MatrixTest method testMultiplication.

// simple small & dense, compare t(X) %*% X against gram computed by glm task.
@Test
public void testMultiplication() {
    Key parsed = Key.make("prostate_parsed");
    Futures fs = new Futures();
    Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
    fr.remove("RACE").remove(fs);
    Key k = Key.make("rebalanced");
    H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
    fr.delete();
    fr = DKV.get(k).get();
    Frame tr = DMatrix.transpose(fr);
    tr.reloadVecs();
    Frame z = DMatrix.mmul(tr, fr);
    DataInfo dinfo = new DataInfo(fr, 0, false, false, DataInfo.TransformType.NONE);
    GramTask gt = new GramTask(null, dinfo, false, false).doAll(dinfo._adaptedFrame);
    gt._gram.mul(gt._nobs);
    double[][] gram = gt._gram.getDenseXX();
    for (int i = 0; i < gram.length; ++i) for (int j = 0; j < gram[i].length; ++j) assertEquals("position " + i + ", " + j, gram[i][j], z.vec(j).at(i), 1e-4);
    fr.delete();
    for (Vec v : tr.vecs()) v.remove(fs);
    for (Vec v : z.vecs()) v.remove(fs);
    //    for(Vec v:z2.vecs())
    //      v.remove(fs);
    fs.blockForPending();
    checkLeakedKeys();
}
Also used : DataInfo(hex.FrameTask.DataInfo) Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) NFSFileVec(water.fvec.NFSFileVec) GramTask(hex.gram.Gram.GramTask) Test(org.junit.Test)

Aggregations

DataInfo (hex.FrameTask.DataInfo)3 GramTask (hex.gram.Gram.GramTask)3 Test (org.junit.Test)2 Frame (water.fvec.Frame)2 File (java.io.File)1 ArrayList (java.util.ArrayList)1 NFSFileVec (water.fvec.NFSFileVec)1 RebalanceDataSet (water.fvec.RebalanceDataSet)1 Vec (water.fvec.Vec)1