use of hex.grid.Grid in project h2o-3 by h2oai.
the class GBMGridTest method testRandomCarsGrid.
//@Ignore("PUBDEV-1648")
@Test
public void testRandomCarsGrid() {
Grid grid = null;
GBMModel gbmRebuilt = null;
Frame fr = null;
Vec old = null;
try {
fr = parse_test_file("smalldata/junit/cars.csv");
fr.remove("name").remove();
old = fr.remove("economy (mpg)");
// response to last column
fr.add("economy (mpg)", old);
DKV.put(fr);
// Setup random hyperparameter search space
HashMap<String, Object[]> hyperParms = new HashMap<>();
hyperParms.put("_distribution", new DistributionFamily[] { DistributionFamily.gaussian });
// Construct random grid search space
Random rng = new Random();
Integer ntreesDim = rng.nextInt(4) + 1;
Integer maxDepthDim = rng.nextInt(4) + 1;
Integer learnRateDim = rng.nextInt(4) + 1;
Integer[] ntreesArr = interval(1, 25);
ArrayList<Integer> ntreesList = new ArrayList<>(Arrays.asList(ntreesArr));
Collections.shuffle(ntreesList);
Integer[] ntreesSpace = new Integer[ntreesDim];
for (int i = 0; i < ntreesDim; i++) {
ntreesSpace[i] = ntreesList.get(i);
}
Integer[] maxDepthArr = interval(1, 10);
ArrayList<Integer> maxDepthList = new ArrayList<>(Arrays.asList(maxDepthArr));
Collections.shuffle(maxDepthList);
Integer[] maxDepthSpace = new Integer[maxDepthDim];
for (int i = 0; i < maxDepthDim; i++) {
maxDepthSpace[i] = maxDepthList.get(i);
}
Double[] learnRateArr = interval(0.01, 1.0, 0.01);
ArrayList<Double> learnRateList = new ArrayList<>(Arrays.asList(learnRateArr));
Collections.shuffle(learnRateList);
Double[] learnRateSpace = new Double[learnRateDim];
for (int i = 0; i < learnRateDim; i++) {
learnRateSpace[i] = learnRateList.get(i);
}
hyperParms.put("_ntrees", ntreesSpace);
hyperParms.put("_max_depth", maxDepthSpace);
hyperParms.put("_learn_rate", learnRateSpace);
// Fire off a grid search
GBMModel.GBMParameters params = new GBMModel.GBMParameters();
params._train = fr._key;
params._response_column = "economy (mpg)";
// Get the Grid for this modeling class and frame
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = gs.get();
System.out.println("ntrees search space: " + Arrays.toString(ntreesSpace));
System.out.println("max_depth search space: " + Arrays.toString(maxDepthSpace));
System.out.println("learn_rate search space: " + Arrays.toString(learnRateSpace));
// Check that cardinality of grid
Model[] ms = grid.getModels();
Integer numModels = ms.length;
System.out.println("Grid consists of " + numModels + " models");
assertTrue(numModels == ntreesDim * maxDepthDim * learnRateDim);
// Pick a random model from the grid
HashMap<String, Object[]> randomHyperParms = new HashMap<>();
randomHyperParms.put("_distribution", new DistributionFamily[] { DistributionFamily.gaussian });
Integer ntreeVal = ntreesSpace[rng.nextInt(ntreesSpace.length)];
randomHyperParms.put("_ntrees", new Integer[] { ntreeVal });
Integer maxDepthVal = maxDepthSpace[rng.nextInt(maxDepthSpace.length)];
randomHyperParms.put("_max_depth", maxDepthSpace);
Double learnRateVal = learnRateSpace[rng.nextInt(learnRateSpace.length)];
randomHyperParms.put("_learn_rate", learnRateSpace);
//TODO: GBMModel gbmFromGrid = (GBMModel) g2.model(randomHyperParms).get();
// Rebuild it with it's parameters
params._distribution = DistributionFamily.gaussian;
params._ntrees = ntreeVal;
params._max_depth = maxDepthVal;
params._learn_rate = learnRateVal;
GBM gbm = new GBM(params);
gbmRebuilt = gbm.trainModel().get();
assertTrue(gbm.isStopped());
// Make sure the MSE metrics match
//double fromGridMSE = gbmFromGrid._output._scored_train[gbmFromGrid._output._ntrees]._mse;
double rebuiltMSE = gbmRebuilt._output._scored_train[gbmRebuilt._output._ntrees]._mse;
//System.out.println("The random grid model's MSE: " + fromGridMSE);
System.out.println("The rebuilt model's MSE: " + rebuiltMSE);
//assertEquals(fromGridMSE, rebuiltMSE);
} finally {
if (old != null)
old.remove();
if (fr != null)
fr.remove();
if (grid != null)
grid.remove();
if (gbmRebuilt != null)
gbmRebuilt.remove();
}
}
use of hex.grid.Grid in project h2o-3 by h2oai.
the class DRFGridTest method testRandomCarsGrid.
//@Ignore("PUBDEV-1648")
@Test
public void testRandomCarsGrid() {
Grid grid = null;
DRFModel drfRebuilt = null;
Frame fr = null;
try {
fr = parse_test_file("smalldata/junit/cars.csv");
fr.remove("name").remove();
Vec old = fr.remove("economy (mpg)");
// response to last column
fr.add("economy (mpg)", old);
DKV.put(fr);
// Setup random hyperparameter search space
HashMap<String, Object[]> hyperParms = new HashMap<>();
// Construct random grid search space
long seed = System.nanoTime();
Random rng = new Random(seed);
// Limit to 1-3 randomly, 4 times. Average total number of models is
// 2^4, or 16. Max is 81 models.
Integer ntreesDim = rng.nextInt(3) + 1;
Integer maxDepthDim = rng.nextInt(3) + 1;
Integer mtriesDim = rng.nextInt(3) + 1;
Integer sampleRateDim = rng.nextInt(3) + 1;
Integer[] ntreesArr = interval(1, 15);
ArrayList<Integer> ntreesList = new ArrayList<>(Arrays.asList(ntreesArr));
Collections.shuffle(ntreesList);
Integer[] ntreesSpace = new Integer[ntreesDim];
for (int i = 0; i < ntreesDim; i++) {
ntreesSpace[i] = ntreesList.get(i);
}
Integer[] maxDepthArr = interval(1, 10);
ArrayList<Integer> maxDepthList = new ArrayList<>(Arrays.asList(maxDepthArr));
Collections.shuffle(maxDepthList);
Integer[] maxDepthSpace = new Integer[maxDepthDim];
for (int i = 0; i < maxDepthDim; i++) {
maxDepthSpace[i] = maxDepthList.get(i);
}
Integer[] mtriesArr = interval(1, 5);
ArrayList<Integer> mtriesList = new ArrayList<>(Arrays.asList(mtriesArr));
Collections.shuffle(mtriesList);
Integer[] mtriesSpace = new Integer[mtriesDim];
for (int i = 0; i < mtriesDim; i++) {
mtriesSpace[i] = mtriesList.get(i);
}
Double[] sampleRateArr = interval(0.01, 0.99, 0.01);
ArrayList<Double> sampleRateList = new ArrayList<>(Arrays.asList(sampleRateArr));
Collections.shuffle(sampleRateList);
Double[] sampleRateSpace = new Double[sampleRateDim];
for (int i = 0; i < sampleRateDim; i++) {
sampleRateSpace[i] = sampleRateList.get(i);
}
hyperParms.put("_ntrees", ntreesSpace);
hyperParms.put("_max_depth", maxDepthSpace);
hyperParms.put("_mtries", mtriesSpace);
hyperParms.put("_sample_rate", sampleRateSpace);
// Fire off a grid search
DRFModel.DRFParameters params = new DRFModel.DRFParameters();
params._train = fr._key;
params._response_column = "economy (mpg)";
// Get the Grid for this modeling class and frame
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = gs.get();
System.out.println("Test seed: " + seed);
System.out.println("ntrees search space: " + Arrays.toString(ntreesSpace));
System.out.println("max_depth search space: " + Arrays.toString(maxDepthSpace));
System.out.println("mtries search space: " + Arrays.toString(mtriesSpace));
System.out.println("sample_rate search space: " + Arrays.toString(sampleRateSpace));
// Check that cardinality of grid
Model[] ms = grid.getModels();
int numModels = ms.length;
System.out.println("Grid consists of " + numModels + " models");
assertEquals("Number of models should match hyper space size", numModels, ntreesDim * maxDepthDim * sampleRateDim * mtriesDim + grid.getFailureCount());
// Pick a random model from the grid
HashMap<String, Object[]> randomHyperParms = new HashMap<>();
Integer ntreeVal = ntreesSpace[rng.nextInt(ntreesSpace.length)];
randomHyperParms.put("_ntrees", new Integer[] { ntreeVal });
Integer maxDepthVal = maxDepthSpace[rng.nextInt(maxDepthSpace.length)];
randomHyperParms.put("_max_depth", maxDepthSpace);
Integer mtriesVal = mtriesSpace[rng.nextInt(mtriesSpace.length)];
randomHyperParms.put("_max_depth", mtriesSpace);
Double sampleRateVal = sampleRateSpace[rng.nextInt(sampleRateSpace.length)];
randomHyperParms.put("_sample_rate", sampleRateSpace);
//TODO: DRFModel drfFromGrid = (DRFModel) g2.model(randomHyperParms).get();
// Rebuild it with it's parameters
params._ntrees = ntreeVal;
params._max_depth = maxDepthVal;
params._mtries = mtriesVal;
drfRebuilt = new DRF(params).trainModel().get();
// Make sure the MSE metrics match
//double fromGridMSE = drfFromGrid._output._scored_train[drfFromGrid._output._ntrees]._mse;
double rebuiltMSE = drfRebuilt._output._scored_train[drfRebuilt._output._ntrees]._mse;
//System.out.println("The random grid model's MSE: " + fromGridMSE);
System.out.println("The rebuilt model's MSE: " + rebuiltMSE);
//assertEquals(fromGridMSE, rebuiltMSE);
} finally {
if (fr != null) {
fr.remove();
}
if (grid != null) {
grid.remove();
}
if (drfRebuilt != null) {
drfRebuilt.remove();
}
}
}
use of hex.grid.Grid in project h2o-3 by h2oai.
the class DRFGridTest method testCarsGrid.
@Test
public void testCarsGrid() {
Grid<DRFModel.DRFParameters> grid = null;
Frame fr = null;
Vec old = null;
try {
fr = parse_test_file("smalldata/junit/cars.csv");
// Remove unique id
fr.remove("name").remove();
old = fr.remove("cylinders");
// response to last column
fr.add("cylinders", old.toCategoricalVec());
DKV.put(fr);
// Setup hyperparameter search space
final Double[] legalSampleRateOpts = new Double[] { 0.5 };
final Double[] illegalSampleRateOpts = new Double[] { 2.0 };
HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {
{
put("_ntrees", new Integer[] { 2, 4 });
put("_max_depth", new Integer[] { 10, 20 });
put("_mtries", new Integer[] { -1, 4 });
put("_sample_rate", ArrayUtils.join(legalSampleRateOpts, illegalSampleRateOpts));
}
};
// Name of used hyper parameters
String[] hyperParamNames = hyperParms.keySet().toArray(new String[hyperParms.size()]);
Arrays.sort(hyperParamNames);
int hyperSpaceSize = ArrayUtils.crossProductSize(hyperParms);
// Fire off a grid search
DRFModel.DRFParameters params = new DRFModel.DRFParameters();
params._train = fr._key;
params._response_column = "cylinders";
// Get the Grid for this modeling class and frame
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = (Grid<DRFModel.DRFParameters>) gs.get();
// Make sure number of produced models match size of specified hyper space
Assert.assertEquals("Size of grid should match to size of hyper space", hyperSpaceSize, grid.getModelCount() + grid.getFailureCount());
//
// Make sure that names of used parameters match
//
String[] gridHyperNames = grid.getHyperNames();
Arrays.sort(gridHyperNames);
Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames, gridHyperNames);
//
// Make sure that values of used parameters match as well to the specified values
//
Model[] ms = grid.getModels();
Map<String, Set<Object>> usedModelParams = GridTestUtils.initMap(hyperParamNames);
for (Model m : ms) {
DRFModel drf = (DRFModel) m;
System.out.println(drf._output._scored_train[drf._output._ntrees]._mse + " " + Arrays.deepToString(ArrayUtils.zip(grid.getHyperNames(), grid.getHyperValues(drf._parms))));
GridTestUtils.extractParams(usedModelParams, drf._parms, hyperParamNames);
}
hyperParms.put("_sample_rate", legalSampleRateOpts);
GridTestUtils.assertParamsEqual("Grid models parameters have to cover specified hyper space", hyperParms, usedModelParams);
// Verify model failure
Map<String, Set<Object>> failedHyperParams = GridTestUtils.initMap(hyperParamNames);
for (Model.Parameters failedParams : grid.getFailedParameters()) {
GridTestUtils.extractParams(failedHyperParams, failedParams, hyperParamNames);
}
hyperParms.put("_sample_rate", illegalSampleRateOpts);
GridTestUtils.assertParamsEqual("Failed model parameters have to correspond to specified hyper space", hyperParms, failedHyperParams);
} finally {
if (old != null) {
old.remove();
}
if (fr != null) {
fr.remove();
}
if (grid != null) {
grid.remove();
}
}
}
use of hex.grid.Grid in project h2o-3 by h2oai.
the class KMeansGridTest method testIrisGrid.
@Test
public void testIrisGrid() {
Grid<KMeansModel.KMeansParameters> grid = null;
Frame fr = null;
try {
fr = parse_test_file("smalldata/iris/iris_wheader.csv");
// 4-dimensional hyperparameter search
HashMap<String, Object[]> hyperParms = new HashMap<>();
// Search over this range of K's
Integer[] legalKOpts = new Integer[] { 1, 2, 3, 4, 5 };
Integer[] illegalKOpts = new Integer[] { 0 };
hyperParms.put("_k", ArrayUtils.join(legalKOpts, illegalKOpts));
// Search over this range of the init enum
hyperParms.put("_init", new KMeans.Initialization[] { KMeans.Initialization.Random, KMeans.Initialization.PlusPlus, KMeans.Initialization.Furthest });
// Search over this range of the init enum
hyperParms.put("_seed", new Long[] { /* 0L, */
1L, 123456789L, 987654321L });
// Name of used hyper parameters
String[] hyperParamNames = hyperParms.keySet().toArray(new String[hyperParms.size()]);
Arrays.sort(hyperParamNames);
int hyperSpaceSize = ArrayUtils.crossProductSize(hyperParms);
// Create default parameters
KMeansModel.KMeansParameters params = new KMeansModel.KMeansParameters();
params._train = fr._key;
// Fire off a grid search and get result
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = (Grid<KMeansModel.KMeansParameters>) gs.get();
// Make sure number of produced models match size of specified hyper space
Assert.assertEquals("Size of grid should match to size of hyper space", hyperSpaceSize, grid.getModelCount() + grid.getFailureCount());
//
// Make sure that names of used parameters match
//
String[] gridHyperNames = grid.getHyperNames();
Arrays.sort(gridHyperNames);
Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames, gridHyperNames);
//
// Make sure that values of used parameters match as well to the specified values
//
Map<String, Set<Object>> usedModelParams = GridTestUtils.initMap(hyperParamNames);
Model[] ms = grid.getModels();
for (Model m : ms) {
KMeansModel kmm = (KMeansModel) m;
System.out.println(kmm._output._tot_withinss + " " + Arrays.deepToString(ArrayUtils.zip(grid.getHyperNames(), grid.getHyperValues(kmm._parms))));
GridTestUtils.extractParams(usedModelParams, kmm._parms, hyperParamNames);
}
hyperParms.put("_k", legalKOpts);
GridTestUtils.assertParamsEqual("Grid models parameters have to cover specified hyper space", hyperParms, usedModelParams);
// Verify model failure
Map<String, Set<Object>> failedHyperParams = GridTestUtils.initMap(hyperParamNames);
;
for (Model.Parameters failedParams : grid.getFailedParameters()) {
GridTestUtils.extractParams(failedHyperParams, (KMeansModel.KMeansParameters) failedParams, hyperParamNames);
}
hyperParms.put("_k", illegalKOpts);
GridTestUtils.assertParamsEqual("Failed model parameters have to correspond to specified hyper space", hyperParms, failedHyperParams);
} finally {
if (fr != null) {
fr.remove();
}
if (grid != null) {
grid.remove();
}
}
}
use of hex.grid.Grid in project h2o-3 by h2oai.
the class KMeansGridTest method testRandomCarsGrid.
@Ignore("PUBDEV-1675")
public void testRandomCarsGrid() {
Grid grid = null;
KMeansModel kmRebuilt = null;
Frame fr = null;
Frame init = ArrayUtils.frame(ard(ard(5.0, 3.4, 1.5, 0.2), ard(7.0, 3.2, 4.7, 1.4), ard(6.5, 3.0, 5.8, 2.2)));
try {
fr = parse_test_file("smalldata/iris/iris_wheader.csv");
fr.remove("class").remove();
DKV.put(fr);
// Setup random hyperparameter search space
HashMap<String, Object[]> hyperParms = new HashMap<>();
// Construct random grid search space
Random rng = new Random();
Integer kDim = rng.nextInt(4) + 1;
Integer initDim = rng.nextInt(4) + 1;
Integer seedDim = rng.nextInt(4) + 1;
Integer standardizeDim = rng.nextInt(2) + 1;
Integer[] kArr = new Integer[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 };
ArrayList<Integer> kList = new ArrayList<Integer>(Arrays.asList(kArr));
Collections.shuffle(kList);
Integer[] kSpace = new Integer[kDim];
for (int i = 0; i < kDim; i++) {
kSpace[i] = kList.get(i);
}
KMeans.Initialization[] initArr = new KMeans.Initialization[] { KMeans.Initialization.Random, KMeans.Initialization.User, KMeans.Initialization.PlusPlus, KMeans.Initialization.Furthest };
ArrayList<KMeans.Initialization> initList = new ArrayList<KMeans.Initialization>(Arrays.asList(initArr));
Collections.shuffle(initList);
KMeans.Initialization[] initSpace = new KMeans.Initialization[initDim];
for (int i = 0; i < initDim; i++) {
initSpace[i] = initList.get(i);
}
Long[] seedArr = new Long[] { 0L, 1L, 123456789L, 987654321L };
ArrayList<Long> seedList = new ArrayList<Long>(Arrays.asList(seedArr));
Collections.shuffle(seedList);
Long[] seedSpace = new Long[seedDim];
for (int i = 0; i < seedDim; i++) {
seedSpace[i] = seedList.get(i);
}
Integer[] standardizeArr = new Integer[] { 1, 0 };
ArrayList<Integer> standardizeList = new ArrayList<Integer>(Arrays.asList(standardizeArr));
Collections.shuffle(standardizeList);
Integer[] standardizeSpace = new Integer[standardizeDim];
for (int i = 0; i < standardizeDim; i++) {
standardizeSpace[i] = standardizeList.get(i);
}
hyperParms.put("_k", kSpace);
hyperParms.put("_init", initSpace);
hyperParms.put("_seed", seedSpace);
hyperParms.put("_standardize", standardizeSpace);
System.out.println("k search space: " + Arrays.toString(kSpace));
System.out.println("max_depth search space: " + Arrays.toString(initSpace));
System.out.println("seed search space: " + Arrays.toString(seedSpace));
System.out.println("sample_rate search space: " + Arrays.toString(standardizeSpace));
// Fire off a grid search
KMeansModel.KMeansParameters params = new KMeansModel.KMeansParameters();
params._train = fr._key;
if (Arrays.asList(initSpace).contains(KMeans.Initialization.User)) {
params._user_points = init._key;
}
// Get the Grid for this modeling class and frame
Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
grid = gs.get();
// Check that cardinality of grid
Model[] ms = grid.getModels();
Integer numModels = ms.length;
System.out.println("Grid consists of " + numModels + " models");
assertTrue(numModels == kDim * initDim * standardizeDim * seedDim);
// Pick a random model from the grid
HashMap<String, Object[]> randomHyperParms = new HashMap<>();
Integer kVal = kSpace[rng.nextInt(kSpace.length)];
randomHyperParms.put("_k", new Integer[] { kVal });
KMeans.Initialization initVal = initSpace[rng.nextInt(initSpace.length)];
randomHyperParms.put("_init", initSpace);
Long seedVal = seedSpace[rng.nextInt(seedSpace.length)];
randomHyperParms.put("_seed", seedSpace);
Integer standardizeVal = standardizeSpace[rng.nextInt(standardizeSpace.length)];
randomHyperParms.put("_standardize", standardizeSpace);
//TODO: KMeansModel kmFromGrid = (KMeansModel) g2.model(randomHyperParms).get();
// Rebuild it with it's parameters
params._k = kVal;
params._init = initVal;
params._seed = seedVal;
params._standardize = standardizeVal == 1;
kmRebuilt = new KMeans(params).trainModel().get();
// Make sure the betweenss metrics match
//double fromGridBetweenss = kmFromGrid._output._betweenss;
double rebuiltBetweenss = kmRebuilt._output._betweenss;
//System.out.println("The random grid model's betweenss: " + fromGridBetweenss);
System.out.println("The rebuilt model's betweenss: " + rebuiltBetweenss);
//assertEquals(fromGridBetweenss, rebuiltBetweenss);
} finally {
if (fr != null) {
fr.remove();
}
if (grid != null) {
grid.remove();
}
if (kmRebuilt != null) {
kmRebuilt.remove();
}
if (init != null) {
init.remove();
}
}
}
Aggregations