Search in sources :

Example 1 with SVDParameters

use of hex.svd.SVDModel.SVDParameters in project h2o-3 by h2oai.

the class SVDTest method testArrestsMissing.

// TODO: This fails GramSVD since JAMA can't handle NaNs in input matrix
@Test
@Ignore
public void testArrestsMissing() throws InterruptedException, ExecutionException {
    SVDModel model = null;
    SVDParameters parms = null;
    Frame train = null;
    long seed = 1234;
    for (double missing_fraction : new double[] { 0, 0.1, 0.25, 0.5, 0.75, 0.9 }) {
        try {
            Scope.enter();
            train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
            // Add missing values to the training data
            if (missing_fraction > 0) {
                Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
                // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
                DKV.put(frtmp._key, frtmp);
                FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
                // MissingInserter is non-blocking, must block here explicitly
                j.execImpl().get();
                // Delete the frame header (not the data)
                DKV.remove(frtmp._key);
            }
            parms = new SVDParameters();
            parms._train = train._key;
            parms._nv = train.numCols();
            parms._transform = DataInfo.TransformType.STANDARDIZE;
            parms._svd_method = SVDParameters.Method.Power;
            parms._max_iterations = 1000;
            parms._seed = seed;
            parms._save_v_frame = false;
            model = new SVD(parms).trainModel().get();
            Log.info(100 * missing_fraction + "% missing values: Singular values = " + Arrays.toString(model._output._d));
            Scope.exit();
        } finally {
            if (train != null)
                train.delete();
            if (model != null)
                model.delete();
        }
    }
}
Also used : FrameUtils(water.util.FrameUtils) Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) SVDParameters(hex.svd.SVDModel.SVDParameters) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with SVDParameters

use of hex.svd.SVDModel.SVDParameters in project h2o-3 by h2oai.

the class SVDTest method testProstateMissingProb.

@Test
public void testProstateMissingProb() throws InterruptedException, ExecutionException {
    long seed = 1234;
    Frame train = null, score = null;
    SVDModel model = null;
    try {
        train = parse_test_file(Key.make("prostate.hex"), "smalldata/prostate/prostate_cat.csv");
        // Add missing values to the training data
        Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
        // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
        DKV.put(frtmp._key, frtmp);
        FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, 0.25);
        // MissingInserter is non-blocking, must block here explicitly
        j.execImpl().get();
        // Delete the frame header (not the data)
        DKV.remove(frtmp._key);
        SVDParameters parms = new SVDParameters();
        parms._train = train._key;
        parms._nv = 8;
        parms._only_v = false;
        parms._keep_u = true;
        parms._svd_method = SVDParameters.Method.Randomized;
        parms._impute_missing = true;
        parms._max_iterations = 20;
        parms._save_v_frame = false;
        model = new SVD(parms).trainModel().get();
        score = model.score(train);
    } finally {
        if (train != null)
            train.delete();
        if (score != null)
            score.delete();
        if (model != null)
            model.delete();
    }
}
Also used : FrameUtils(water.util.FrameUtils) Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) SVDParameters(hex.svd.SVDModel.SVDParameters) Test(org.junit.Test)

Example 3 with SVDParameters

use of hex.svd.SVDModel.SVDParameters in project h2o-3 by h2oai.

the class SVDTest method testCatOnlyPUBDEV3988.

/* Make sure POJO works if the model is only built from categorical variables (no numeric columns) */
@Test
public void testCatOnlyPUBDEV3988() throws InterruptedException, ExecutionException {
    SVDModel model = null;
    Frame train = null, score = null;
    try {
        train = parse_test_file(Key.make("prostate_cat.hex"), "smalldata/prostate/prostate_cat.csv");
        for (int i = train.numCols() - 1; i > 0; i--) {
            Vec v = train.vec(i);
            if (v.get_type() != Vec.T_CAT) {
                train.remove(i);
                Vec.remove(v._key);
            }
        }
        DKV.put(train);
        SVDParameters parms = new SVDParameters();
        parms._train = train._key;
        parms._nv = 2;
        parms._only_v = false;
        parms._keep_u = true;
        parms._svd_method = SVDParameters.Method.Randomized;
        parms._impute_missing = true;
        parms._max_iterations = 20;
        parms._save_v_frame = false;
        model = new SVD(parms).trainModel().get();
        score = model.score(train);
        // Build a POJO, check results with original SVD
        Assert.assertTrue(model.testJavaScoring(train, score, TOLERANCE));
    } finally {
        if (train != null)
            train.delete();
        if (score != null)
            score.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) Vec(water.fvec.Vec) SVDParameters(hex.svd.SVDModel.SVDParameters) Test(org.junit.Test)

Aggregations

SplitFrame (hex.SplitFrame)3 SVDParameters (hex.svd.SVDModel.SVDParameters)3 Test (org.junit.Test)3 Frame (water.fvec.Frame)3 FrameUtils (water.util.FrameUtils)2 Ignore (org.junit.Ignore)1 Vec (water.fvec.Vec)1