use of water.Key in project h2o-2 by h2oai.
the class FrameCookbook method frame_001.
/**
* Read a frame from a file and print out some basic information.
*/
@Test
public void frame_001() {
//-----------------------------------------------------------
// Recipe setup.
//-----------------------------------------------------------
// Path to a file on the cluster filesystem.
// Note that if you have a multi-node H2O cluster, this file must be visible on every H2O node.
String fileName = "../smalldata/airlines/allyears2k_headers.zip";
// Result key that we will use to store the above file in the H2O DKV (Distributed Key/Value store).
Key resultFrameKey = Key.make("allyears2k_headers.hex");
//-----------------------------------------------------------
// Recipe body.
//-----------------------------------------------------------
File file = new File(fileName);
Key tmpKey = NFSFileVec.make(file);
Key[] arrayOfKeysToParse = new Key[] { tmpKey };
Frame fr;
try {
fr = ParseDataset2.parse(resultFrameKey, arrayOfKeysToParse);
} finally {
UKV.remove(tmpKey);
}
// fr is now a valid frame. Print some stuff about it.
Log.info("======================================================================");
Log.info("Number of columns: " + fr.numCols());
String[] columnNames = fr.names();
Log.info("Column names:");
for (String s : columnNames) {
Log.info(" " + s);
}
//-----------------------------------------------------------
// Recipe clean up.
// The unit test framework will fail a test if it leaks keys.
//-----------------------------------------------------------
// Add a sleep if you want to poke around using your Web Browser.
// From the menu, choose Data->View All
//
// logThisH2OInstanceWebBrowserAddress();
// sleepForever();
// UKV (User-visible Key/Value store) is an abstraction over DKV.
//
// When removing through the UKV then sub-objects referenced by the main Frame object
// we created will also get removed.
//
// If we did a DKV.remove() here instead of UKV.remove(), then the test would fail with
// leaked keys.
fr.delete();
UKV.remove(resultFrameKey);
}
use of water.Key in project h2o-2 by h2oai.
the class VecChunkDemo method frame_001.
@Test
public void frame_001() {
String fileName = "../smalldata/iris/iris.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Value v = DKV.get(okey);
Frame f = v.get();
Log.info("frame : " + f);
int len = f.numCols();
for (int i = 0; i < len; i++) {
Log.info("vector :" + i);
// looping through the vectors of a frame and printing specifics
Vec vv = f.vec(i);
Log.info("vector summary :" + vv);
Log.info("vector length :" + vv.length());
Log.info("vector group :" + vv.group());
Log.info("vector na count :" + vv.naCnt());
// null if not enum
Log.info("vector domain null if not enum:" + vv.domain());
int cardinality = vv.cardinality();
Log.info("vector cardianlity :" + vv.cardinality());
if (cardinality != -1) {
for (int j = 0; j < cardinality; j++) Log.info("labels :" + vv.domain(j));
}
//gives the element at that row; count starts from 0.
Log.info("vector value at row 50 :" + vv.at(51));
int chunk_count = vv.nChunks();
Log.info("chunk count :" + chunk_count);
Chunk c = vv.chunkForRow(100);
Log.info("chunk for row 100 :" + c);
}
}
use of water.Key in project h2o-3 by h2oai.
the class GBMGridTest method testCarsGrid.
@Test
public void testCarsGrid() {
Grid<GBMModel.GBMParameters> grid = null;
Frame fr = null;
Vec old = null;
try {
fr = parse_test_file("smalldata/junit/cars.csv");
// Remove unique id
fr.remove("name").remove();
old = fr.remove("cylinders");
// response to last column
fr.add("cylinders", old.toCategoricalVec());
DKV.put(fr);
// Setup hyperparameter search space
final Double[] legalLearnRateOpts = new Double[] { 0.01, 0.1, 0.3 };
final Double[] illegalLearnRateOpts = new Double[] { -1.0 };
HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {
{
put("_ntrees", new Integer[] { 1, 2 });
put("_distribution", new DistributionFamily[] { DistributionFamily.multinomial });
put("_max_depth", new Integer[] { 1, 2, 5 });
put("_learn_rate", ArrayUtils.join(legalLearnRateOpts, illegalLearnRateOpts));
}
};
// Name of used hyper parameters
String[] hyperParamNames = hyperParms.keySet().toArray(new String[hyperParms.size()]);
Arrays.sort(hyperParamNames);
int hyperSpaceSize = ArrayUtils.crossProductSize(hyperParms);
// Fire off a grid search
GBMModel.GBMParameters params = new GBMModel.GBMParameters();
params._train = fr._key;
params._response_column = "cylinders";
// Get the Grid for this modeling class and frame
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = (Grid<GBMModel.GBMParameters>) gs.get();
// Make sure number of produced models match size of specified hyper space
Assert.assertEquals("Size of grid (models+failures) should match to size of hyper space", hyperSpaceSize, grid.getModelCount() + grid.getFailureCount());
//
// Make sure that names of used parameters match
//
String[] gridHyperNames = grid.getHyperNames();
Arrays.sort(gridHyperNames);
Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames, gridHyperNames);
//
// Make sure that values of used parameters match as well to the specified values
//
Key<Model>[] mKeys = grid.getModelKeys();
Map<String, Set<Object>> usedHyperParams = GridTestUtils.initMap(hyperParamNames);
for (Key<Model> mKey : mKeys) {
GBMModel gbm = (GBMModel) mKey.get();
System.out.println(gbm._output._scored_train[gbm._output._ntrees]._mse + " " + Arrays.deepToString(ArrayUtils.zip(grid.getHyperNames(), grid.getHyperValues(gbm._parms))));
GridTestUtils.extractParams(usedHyperParams, gbm._parms, hyperParamNames);
}
// Remove illegal options
hyperParms.put("_learn_rate", legalLearnRateOpts);
GridTestUtils.assertParamsEqual("Grid models parameters have to cover specified hyper space", hyperParms, usedHyperParams);
// Verify model failure
Map<String, Set<Object>> failedHyperParams = GridTestUtils.initMap(hyperParamNames);
;
for (Model.Parameters failedParams : grid.getFailedParameters()) {
GridTestUtils.extractParams(failedHyperParams, failedParams, hyperParamNames);
}
hyperParms.put("_learn_rate", illegalLearnRateOpts);
GridTestUtils.assertParamsEqual("Failed model parameters have to correspond to specified hyper space", hyperParms, failedHyperParams);
} finally {
if (old != null) {
old.remove();
}
if (fr != null) {
fr.remove();
}
if (grid != null) {
grid.remove();
}
}
}
use of water.Key in project h2o-3 by h2oai.
the class AstRecAssignTestUtils method seqStrVec.
static Vec seqStrVec(int... runs) {
Key k = Vec.VectorGroup.VG_LEN1.addVec();
Futures fs = new Futures();
AppendableVec avec = new AppendableVec(k, Vec.T_STR);
NewChunk chunk = new NewChunk(avec, 0);
int seq = 0;
for (int r : runs) {
if (seq > 0)
chunk.addStr(null);
for (int i = 0; i < r; i++) chunk.addStr(String.valueOf(seq++));
}
chunk.close(0, fs);
Vec vec = avec.layout_and_close(fs);
fs.blockForPending();
return vec;
}
use of water.Key in project h2o-2 by h2oai.
the class DdplyTest method testDdplyBig.
// This test is intended to use a file large enough to strip across multiple
// nodes with multiple groups, to test that all generated groups are both
// built and executed distributed.
@Test
public void testDdplyBig() {
Key k0 = Key.make("cars.hex");
Key k1 = Key.make("orange.hex");
try {
Frame fr0 = parseFrame(k0, "smalldata/cars.csv");
checkStr("ddply(cars.hex,c(3),nrow)");
// More complex multi-return
checkStr("ddply(cars.hex,c(3),function(x) {cbind(mean(x[,2]),mean(x[,3]))})");
// A big enough file to distribute across multiple nodes.
// Trimmed down to run in reasonable time.
//Frame fr1 = parseFrame(k1,"smalldata/unbalanced/orange_small_train.data.zip");
//checkStr("ddply(orange.hex,c(7),nrow)");
//checkStr("ddply(orange.hex,c(206,207),function(x){ cbind( mean(x$Var6), sum(x$Var6+x$Var7) ) })");
// A more complex ddply that works as of 3/1/2014 but is slow for a junit
//checkStr("ddply(orange.hex,c(206,207),function(x){"+
// "max6 = max(x$Var6);"+
// "min6 = min(x$Var6);"+
// "len = max6-min6+1;"+
// "tot = sum(x$Var7);"+
// "avg = tot/len"+
// "})");
} finally {
// Remove original hex frame key
Lockable.delete(k0);
// Remove original hex frame key
Lockable.delete(k1);
}
}
Aggregations