use of hex.tree.gbm.GBM in project h2o-3 by h2oai.
the class WorkFlowTest method testWorkFlow.
// End-to-end workflow test:
// 1- load set of files, train, test, holdout
// 2- light data munging
// 3- build model on train; using test as validation
// 4- score on holdout set
//
// If files are missing, silently fail - as the files are big and this is not
// yet a junit test
private void testWorkFlow(String[] files) {
try {
Scope.enter();
// 1- Load datasets
Frame data = load_files("data.hex", files);
if (data == null)
return;
// -------------------------------------------------
// 2- light data munging
// Convert start time to: Day since the Epoch
Vec startime = data.vec("starttime");
data.add(new TimeSplit().doIt(startime));
// Now do a monster Group-By. Count bike starts per-station per-day
Vec days = data.vec("Days");
long start = System.currentTimeMillis();
Frame bph = new CountBikes(days).doAll(days, data.vec("start station name")).makeFrame(Key.make("bph.hex"));
System.out.println("Groupby took " + (System.currentTimeMillis() - start));
System.out.println(bph);
System.out.println(bph.toString(10000, 20));
data.remove();
QuantileModel.QuantileParameters quantile_parms = new QuantileModel.QuantileParameters();
quantile_parms._train = bph._key;
Job<QuantileModel> job2 = new Quantile(quantile_parms).trainModel();
QuantileModel quantile = job2.get();
job2.remove();
System.out.println(Arrays.deepToString(quantile._output._quantiles));
quantile.remove();
// Split into train, test and holdout sets
Key[] keys = new Key[] { Key.make("train.hex"), Key.make("test.hex"), Key.make("hold.hex") };
double[] ratios = new double[] { 0.6, 0.3, 0.1 };
Frame[] frs = ShuffleSplitFrame.shuffleSplitFrame(bph, keys, ratios, 1234567689L);
Frame train = frs[0];
Frame test = frs[1];
Frame hold = frs[2];
bph.remove();
System.out.println(train);
System.out.println(test);
// -------------------------------------------------
// 3- build model on train; using test as validation
// ---
// Gradient Boosting Machine
GBMModel.GBMParameters gbm_parms = new GBMModel.GBMParameters();
// base Model.Parameters
gbm_parms._train = train._key;
gbm_parms._valid = test._key;
// default is false
gbm_parms._score_each_iteration = false;
// SupervisedModel.Parameters
gbm_parms._response_column = "bikes";
// SharedTreeModel.Parameters
// default is 50, 1000 is 0.90, 10000 is 0.91
gbm_parms._ntrees = 500;
// default is 5
gbm_parms._max_depth = 6;
// default
gbm_parms._min_rows = 10;
// default
gbm_parms._nbins = 20;
// GBMModel.Parameters
// default
gbm_parms._distribution = DistributionFamily.gaussian;
// default
gbm_parms._learn_rate = 0.1f;
// Train model; block for results
Job<GBMModel> job = new GBM(gbm_parms).trainModel();
GBMModel gbm = job.get();
job.remove();
// ---
// Build a GLM model also
GLMModel.GLMParameters glm_parms = new GLMModel.GLMParameters(GLMModel.GLMParameters.Family.gaussian);
// base Model.Parameters
glm_parms._train = train._key;
glm_parms._valid = test._key;
// default is false
glm_parms._score_each_iteration = false;
// SupervisedModel.Parameters
glm_parms._response_column = "bikes";
// GLMModel.Parameters
glm_parms._use_all_factor_levels = true;
// Train model; block for results
Job<GLMModel> glm_job = new GLM(glm_parms).trainModel();
GLMModel glm = glm_job.get();
glm_job.remove();
// -------------------------------------------------
// 4- Score on holdout set & report
gbm.score(train).remove();
glm.score(train).remove();
// Cleanup
train.remove();
test.remove();
hold.remove();
} finally {
Scope.exit();
}
}
use of hex.tree.gbm.GBM in project h2o-3 by h2oai.
the class XValPredictionsCheck method testXValPredictions.
@Test
public void testXValPredictions() {
final int nfolds = 3;
Frame tfr = null;
try {
// Load data, hack frames
tfr = parse_test_file("smalldata/iris/iris_wheader.csv");
Frame foldId = new Frame(new String[] { "foldId" }, new Vec[] { AstKFold.kfoldColumn(tfr.vec("class").makeZero(), nfolds, 543216789) });
tfr.add(foldId);
DKV.put(tfr);
// GBM
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "class";
parms._ntrees = 1;
parms._max_depth = 1;
parms._fold_column = "foldId";
parms._distribution = DistributionFamily.multinomial;
parms._keep_cross_validation_predictions = true;
GBM job = new GBM(parms);
GBMModel gbm = job.trainModel().get();
checkModel(gbm, foldId.anyVec(), 3);
// DRF
DRFModel.DRFParameters parmsDRF = new DRFModel.DRFParameters();
parmsDRF._train = tfr._key;
parmsDRF._response_column = "class";
parmsDRF._ntrees = 1;
parmsDRF._max_depth = 1;
parmsDRF._fold_column = "foldId";
parmsDRF._distribution = DistributionFamily.multinomial;
parmsDRF._keep_cross_validation_predictions = true;
DRF drfJob = new DRF(parmsDRF);
DRFModel drf = drfJob.trainModel().get();
checkModel(drf, foldId.anyVec(), 3);
// GLM
GLMModel.GLMParameters parmsGLM = new GLMModel.GLMParameters();
parmsGLM._train = tfr._key;
parmsGLM._response_column = "sepal_len";
parmsGLM._fold_column = "foldId";
parmsGLM._keep_cross_validation_predictions = true;
GLM glmJob = new GLM(parmsGLM);
GLMModel glm = glmJob.trainModel().get();
checkModel(glm, foldId.anyVec(), 1);
// DL
DeepLearningModel.DeepLearningParameters parmsDL = new DeepLearningModel.DeepLearningParameters();
parmsDL._train = tfr._key;
parmsDL._response_column = "class";
parmsDL._hidden = new int[] { 1 };
parmsDL._epochs = 1;
parmsDL._fold_column = "foldId";
parmsDL._keep_cross_validation_predictions = true;
DeepLearning dlJob = new DeepLearning(parmsDL);
DeepLearningModel dl = dlJob.trainModel().get();
checkModel(dl, foldId.anyVec(), 3);
} finally {
if (tfr != null)
tfr.remove();
}
}
use of hex.tree.gbm.GBM in project h2o-3 by h2oai.
the class PartialDependenceTest method weatherBinary.
@Test
public void weatherBinary() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/junit/weather.csv");
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "Date", "RISK_MM", "EvapMM" };
parms._response_column = "RainTomorrow";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
partialDependence._nbins = 33;
partialDependence._cols = new String[] { "Sunshine", "MaxWindPeriod", "WindSpeed9am" };
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
use of hex.tree.gbm.GBM in project h2o-3 by h2oai.
the class PartialDependenceTest method prostateBinaryPickCols.
@Test
public void prostateBinaryPickCols() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/prostate/prostate.csv");
for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
Vec v = fr.remove(s);
fr.add(s, v.toCategoricalVec());
v.remove();
}
DKV.put(fr);
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "ID" };
parms._response_column = "CAPSULE";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
//pick columns manually
partialDependence._cols = new String[] { "DPROS", "GLEASON" };
partialDependence._nbins = 10;
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
Assert.assertTrue(partialDependence._partial_dependence_data.length == 2);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
Aggregations