Search in sources :

Example 71 with Key

use of water.Key in project h2o-2 by h2oai.

the class FrameCookbook method frame_001.

/**
     * Read a frame from a file and print out some basic information.
     */
@Test
public void frame_001() {
    //-----------------------------------------------------------
    // Recipe setup.
    //-----------------------------------------------------------
    // Path to a file on the cluster filesystem.
    // Note that if you have a multi-node H2O cluster, this file must be visible on every H2O node.
    String fileName = "../smalldata/airlines/allyears2k_headers.zip";
    // Result key that we will use to store the above file in the H2O DKV (Distributed Key/Value store).
    Key resultFrameKey = Key.make("allyears2k_headers.hex");
    //-----------------------------------------------------------
    // Recipe body.
    //-----------------------------------------------------------
    File file = new File(fileName);
    Key tmpKey = NFSFileVec.make(file);
    Key[] arrayOfKeysToParse = new Key[] { tmpKey };
    Frame fr;
    try {
        fr = ParseDataset2.parse(resultFrameKey, arrayOfKeysToParse);
    } finally {
        UKV.remove(tmpKey);
    }
    // fr is now a valid frame.  Print some stuff about it.
    Log.info("======================================================================");
    Log.info("Number of columns: " + fr.numCols());
    String[] columnNames = fr.names();
    Log.info("Column names:");
    for (String s : columnNames) {
        Log.info("    " + s);
    }
    //-----------------------------------------------------------
    // Recipe clean up.
    // The unit test framework will fail a test if it leaks keys.
    //-----------------------------------------------------------
    // Add a sleep if you want to poke around using your Web Browser.
    //     From the menu, choose Data->View All
    //
    // logThisH2OInstanceWebBrowserAddress();
    // sleepForever();
    // UKV (User-visible Key/Value store) is an abstraction over DKV.
    //
    // When removing through the UKV then sub-objects referenced by the main Frame object
    // we created will also get removed.
    //
    // If we did a DKV.remove() here instead of UKV.remove(), then the test would fail with
    // leaked keys.
    fr.delete();
    UKV.remove(resultFrameKey);
}
Also used : Frame(water.fvec.Frame) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 72 with Key

use of water.Key in project h2o-2 by h2oai.

the class VecChunkDemo method frame_001.

@Test
public void frame_001() {
    String fileName = "../smalldata/iris/iris.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Value v = DKV.get(okey);
    Frame f = v.get();
    Log.info("frame              : " + f);
    int len = f.numCols();
    for (int i = 0; i < len; i++) {
        Log.info("vector                        :" + i);
        // looping through the vectors of a frame and printing specifics
        Vec vv = f.vec(i);
        Log.info("vector     summary                :" + vv);
        Log.info("vector     length                 :" + vv.length());
        Log.info("vector     group                  :" + vv.group());
        Log.info("vector     na count               :" + vv.naCnt());
        // null if not enum
        Log.info("vector     domain null if not enum:" + vv.domain());
        int cardinality = vv.cardinality();
        Log.info("vector     cardianlity            :" + vv.cardinality());
        if (cardinality != -1) {
            for (int j = 0; j < cardinality; j++) Log.info("labels                    :" + vv.domain(j));
        }
        //gives the element at that row; count starts from 0.
        Log.info("vector value at row 50            :" + vv.at(51));
        int chunk_count = vv.nChunks();
        Log.info("chunk     count                   :" + chunk_count);
        Chunk c = vv.chunkForRow(100);
        Log.info("chunk     for row 100             :" + c);
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) Value(water.Value) Chunk(water.fvec.Chunk) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 73 with Key

use of water.Key in project h2o-3 by h2oai.

the class GBMGridTest method testCarsGrid.

@Test
public void testCarsGrid() {
    Grid<GBMModel.GBMParameters> grid = null;
    Frame fr = null;
    Vec old = null;
    try {
        fr = parse_test_file("smalldata/junit/cars.csv");
        // Remove unique id
        fr.remove("name").remove();
        old = fr.remove("cylinders");
        // response to last column
        fr.add("cylinders", old.toCategoricalVec());
        DKV.put(fr);
        // Setup hyperparameter search space
        final Double[] legalLearnRateOpts = new Double[] { 0.01, 0.1, 0.3 };
        final Double[] illegalLearnRateOpts = new Double[] { -1.0 };
        HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {

            {
                put("_ntrees", new Integer[] { 1, 2 });
                put("_distribution", new DistributionFamily[] { DistributionFamily.multinomial });
                put("_max_depth", new Integer[] { 1, 2, 5 });
                put("_learn_rate", ArrayUtils.join(legalLearnRateOpts, illegalLearnRateOpts));
            }
        };
        // Name of used hyper parameters
        String[] hyperParamNames = hyperParms.keySet().toArray(new String[hyperParms.size()]);
        Arrays.sort(hyperParamNames);
        int hyperSpaceSize = ArrayUtils.crossProductSize(hyperParms);
        // Fire off a grid search
        GBMModel.GBMParameters params = new GBMModel.GBMParameters();
        params._train = fr._key;
        params._response_column = "cylinders";
        // Get the Grid for this modeling class and frame
        Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
        grid = (Grid<GBMModel.GBMParameters>) gs.get();
        // Make sure number of produced models match size of specified hyper space
        Assert.assertEquals("Size of grid (models+failures) should match to size of hyper space", hyperSpaceSize, grid.getModelCount() + grid.getFailureCount());
        //
        // Make sure that names of used parameters match
        //
        String[] gridHyperNames = grid.getHyperNames();
        Arrays.sort(gridHyperNames);
        Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames, gridHyperNames);
        //
        // Make sure that values of used parameters match as well to the specified values
        //
        Key<Model>[] mKeys = grid.getModelKeys();
        Map<String, Set<Object>> usedHyperParams = GridTestUtils.initMap(hyperParamNames);
        for (Key<Model> mKey : mKeys) {
            GBMModel gbm = (GBMModel) mKey.get();
            System.out.println(gbm._output._scored_train[gbm._output._ntrees]._mse + " " + Arrays.deepToString(ArrayUtils.zip(grid.getHyperNames(), grid.getHyperValues(gbm._parms))));
            GridTestUtils.extractParams(usedHyperParams, gbm._parms, hyperParamNames);
        }
        // Remove illegal options
        hyperParms.put("_learn_rate", legalLearnRateOpts);
        GridTestUtils.assertParamsEqual("Grid models parameters have to cover specified hyper space", hyperParms, usedHyperParams);
        // Verify model failure
        Map<String, Set<Object>> failedHyperParams = GridTestUtils.initMap(hyperParamNames);
        ;
        for (Model.Parameters failedParams : grid.getFailedParameters()) {
            GridTestUtils.extractParams(failedHyperParams, failedParams, hyperParamNames);
        }
        hyperParms.put("_learn_rate", illegalLearnRateOpts);
        GridTestUtils.assertParamsEqual("Failed model parameters have to correspond to specified hyper space", hyperParms, failedHyperParams);
    } finally {
        if (old != null) {
            old.remove();
        }
        if (fr != null) {
            fr.remove();
        }
        if (grid != null) {
            grid.remove();
        }
    }
}
Also used : Frame(water.fvec.Frame) Set(java.util.Set) HashMap(java.util.HashMap) Grid(hex.grid.Grid) Vec(water.fvec.Vec) Model(hex.Model) Key(water.Key) Test(org.junit.Test)

Example 74 with Key

use of water.Key in project h2o-3 by h2oai.

the class AstRecAssignTestUtils method seqStrVec.

static Vec seqStrVec(int... runs) {
    Key k = Vec.VectorGroup.VG_LEN1.addVec();
    Futures fs = new Futures();
    AppendableVec avec = new AppendableVec(k, Vec.T_STR);
    NewChunk chunk = new NewChunk(avec, 0);
    int seq = 0;
    for (int r : runs) {
        if (seq > 0)
            chunk.addStr(null);
        for (int i = 0; i < r; i++) chunk.addStr(String.valueOf(seq++));
    }
    chunk.close(0, fs);
    Vec vec = avec.layout_and_close(fs);
    fs.blockForPending();
    return vec;
}
Also used : Futures(water.Futures) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) AppendableVec(water.fvec.AppendableVec) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 75 with Key

use of water.Key in project h2o-2 by h2oai.

the class DdplyTest method testDdplyBig.

// This test is intended to use a file large enough to strip across multiple
// nodes with multiple groups, to test that all generated groups are both
// built and executed distributed.
@Test
public void testDdplyBig() {
    Key k0 = Key.make("cars.hex");
    Key k1 = Key.make("orange.hex");
    try {
        Frame fr0 = parseFrame(k0, "smalldata/cars.csv");
        checkStr("ddply(cars.hex,c(3),nrow)");
        // More complex multi-return
        checkStr("ddply(cars.hex,c(3),function(x) {cbind(mean(x[,2]),mean(x[,3]))})");
    // A big enough file to distribute across multiple nodes.
    // Trimmed down to run in reasonable time.
    //Frame fr1 = parseFrame(k1,"smalldata/unbalanced/orange_small_train.data.zip");
    //checkStr("ddply(orange.hex,c(7),nrow)");
    //checkStr("ddply(orange.hex,c(206,207),function(x){ cbind( mean(x$Var6), sum(x$Var6+x$Var7) ) })");
    // A more complex ddply that works as of 3/1/2014 but is slow for a junit
    //checkStr("ddply(orange.hex,c(206,207),function(x){"+
    //         "max6 = max(x$Var6);"+
    //         "min6 = min(x$Var6);"+
    //         "len  = max6-min6+1;"+
    //         "tot  = sum(x$Var7);"+
    //         "avg  = tot/len"+
    //         "})");
    } finally {
        // Remove original hex frame key
        Lockable.delete(k0);
        // Remove original hex frame key
        Lockable.delete(k1);
    }
}
Also used : Frame(water.fvec.Frame) Key(water.Key) Test(org.junit.Test)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3