Search in sources :

Example 51 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GLRMTest method testSubset.

@Ignore
@Test
public void testSubset() throws InterruptedException, ExecutionException {
    //Analogous to pyunit_subset_glrm.py
    GLRM job = null;
    GLRMModel model = null;
    Frame train;
    InputStream is;
    try {
        is = new FileInputStream(FileUtils.getFile("bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip"));
        UploadFileVec.ReadPutStats stats = new UploadFileVec.ReadPutStats();
        UploadFileVec.readPut("train", is, stats);
    } catch (Exception e) {
        e.printStackTrace();
    }
    ParseDataset.parse(Key.make("train_parsed"), Key.make("train"));
    train = DKV.getGet("train_parsed");
    try {
        Log.info("num chunks: ", train.anyVec().nChunks());
        Vec[] acs_zcta_vec = { train.vec(0).toCategoricalVec() };
        Frame acs_zcta_fr = new Frame(Key.<Frame>make("acs_zcta_fr"), new String[] { "name" }, acs_zcta_vec);
        DKV.put(acs_zcta_fr);
        train.remove(0).remove();
        DKV.put(train);
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._gamma_x = 0.25;
        parms._gamma_y = 0.5;
        parms._regularization_x = GlrmRegularizer.Quadratic;
        parms._regularization_y = GlrmRegularizer.L1;
        parms._k = 10;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        parms._max_iterations = 1;
        parms._loss = GlrmLoss.Quadratic;
        try {
            Scope.enter();
            job = new GLRM(parms);
            model = job.trainModel().get();
            String s = "(tmp= py_4 (rows (cols_py " + model._output._representation_key + " [0 1]) (tmp= py_3 (| (| (| (| (| (== (tmp= py_2 " + acs_zcta_fr._key + ") \"10065\") (== py_2 \"11219\")) (== py_2 \"66753\")) (== py_2 \"84104\")) (== py_2 \"94086\")) (== py_2 \"95014\")))))";
            Val val = Rapids.exec(s);
        } catch (Throwable t) {
            t.printStackTrace();
            throw new RuntimeException(t);
        } finally {
            acs_zcta_fr.delete();
            Scope.exit();
        }
    } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
    } finally {
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Val(water.rapids.Val) Frame(water.fvec.Frame) UploadFileVec(water.fvec.UploadFileVec) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) FileInputStream(java.io.FileInputStream) ExecutionException(java.util.concurrent.ExecutionException) UploadFileVec(water.fvec.UploadFileVec) Vec(water.fvec.Vec) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 52 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class PartialDependenceTest method prostateBinary.

@Test
public void prostateBinary() {
    Frame fr = null;
    GBMModel model = null;
    PartialDependence partialDependence = null;
    try {
        // Frame
        fr = parse_test_file("smalldata/prostate/prostate.csv");
        for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
            Vec v = fr.remove(s);
            fr.add(s, v.toCategoricalVec());
            v.remove();
        }
        DKV.put(fr);
        // Model
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = fr._key;
        parms._ignored_columns = new String[] { "ID" };
        parms._response_column = "CAPSULE";
        model = new GBM(parms).trainModel().get();
        // PartialDependence
        partialDependence = new PartialDependence(Key.<PartialDependence>make());
        //      partialDependence._cols = model._output._names;
        partialDependence._nbins = 10;
        partialDependence._model_id = (Key) model._key;
        partialDependence._frame_id = fr._key;
        partialDependence.execImpl().get();
        for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
    } finally {
        if (fr != null)
            fr.remove();
        if (model != null)
            model.remove();
        if (partialDependence != null)
            partialDependence.remove();
    }
}
Also used : PartialDependence(hex.PartialDependence) Frame(water.fvec.Frame) GBMModel(hex.tree.gbm.GBMModel) GBM(hex.tree.gbm.GBM) TwoDimTable(water.util.TwoDimTable) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 53 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class PartialDependenceTest method prostateRegression.

@Test
public void prostateRegression() {
    Frame fr = null;
    GBMModel model = null;
    PartialDependence partialDependence = null;
    try {
        // Frame
        fr = parse_test_file("smalldata/prostate/prostate.csv");
        for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
            Vec v = fr.remove(s);
            fr.add(s, v.toCategoricalVec());
            v.remove();
        }
        DKV.put(fr);
        // Model
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = fr._key;
        parms._ignored_columns = new String[] { "ID" };
        parms._response_column = "AGE";
        model = new GBM(parms).trainModel().get();
        // PartialDependence
        partialDependence = new PartialDependence(Key.<PartialDependence>make());
        partialDependence._nbins = 10;
        partialDependence._model_id = (Key) model._key;
        partialDependence._frame_id = fr._key;
        partialDependence.execImpl().get();
        for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
    } finally {
        if (fr != null)
            fr.remove();
        if (model != null)
            model.remove();
        if (partialDependence != null)
            partialDependence.remove();
    }
}
Also used : PartialDependence(hex.PartialDependence) Frame(water.fvec.Frame) GBMModel(hex.tree.gbm.GBMModel) GBM(hex.tree.gbm.GBM) TwoDimTable(water.util.TwoDimTable) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 54 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class SVDTest method testCatOnlyPUBDEV3988.

/* Make sure POJO works if the model is only built from categorical variables (no numeric columns) */
@Test
public void testCatOnlyPUBDEV3988() throws InterruptedException, ExecutionException {
    SVDModel model = null;
    Frame train = null, score = null;
    try {
        train = parse_test_file(Key.make("prostate_cat.hex"), "smalldata/prostate/prostate_cat.csv");
        for (int i = train.numCols() - 1; i > 0; i--) {
            Vec v = train.vec(i);
            if (v.get_type() != Vec.T_CAT) {
                train.remove(i);
                Vec.remove(v._key);
            }
        }
        DKV.put(train);
        SVDParameters parms = new SVDParameters();
        parms._train = train._key;
        parms._nv = 2;
        parms._only_v = false;
        parms._keep_u = true;
        parms._svd_method = SVDParameters.Method.Randomized;
        parms._impute_missing = true;
        parms._max_iterations = 20;
        parms._save_v_frame = false;
        model = new SVD(parms).trainModel().get();
        score = model.score(train);
        // Build a POJO, check results with original SVD
        Assert.assertTrue(model.testJavaScoring(train, score, TOLERANCE));
    } finally {
        if (train != null)
            train.delete();
        if (score != null)
            score.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) Vec(water.fvec.Vec) SVDParameters(hex.svd.SVDModel.SVDParameters) Test(org.junit.Test)

Example 55 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class DRFGridTest method testCollisionOfDRFParamsChecksum.

@Test
public void testCollisionOfDRFParamsChecksum() {
    Frame fr = null;
    try {
        fr = parse_test_file("smalldata/junit/cars.csv");
        fr.remove("name").remove();
        Vec old = fr.remove("economy (mpg)");
        // response to last column
        fr.add("economy (mpg)", old);
        DKV.put(fr);
        DRFModel.DRFParameters params1 = new DRFModel.DRFParameters();
        params1._train = fr._key;
        params1._response_column = "economy (mpg)";
        params1._seed = -4522296119273841674L;
        params1._mtries = 3;
        params1._max_depth = 15;
        params1._ntrees = 9;
        params1._sample_rate = 0.6499997f;
        DRFModel.DRFParameters params2 = new DRFModel.DRFParameters();
        params2._train = fr._key;
        params2._response_column = "economy (mpg)";
        params2._seed = -4522296119273841674L;
        params2._mtries = 1;
        params2._max_depth = 1;
        params2._ntrees = 13;
        params2._sample_rate = 0.6499997f;
        long csum1 = params1.checksum();
        long csum2 = params2.checksum();
        Assert.assertNotEquals("Checksums shoudl be different", csum1, csum2);
    } finally {
        if (fr != null) {
            fr.remove();
        }
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9