use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNfoldsOneVsRest.
@Test
public void testNfoldsOneVsRest() {
Frame tfr = null;
GBMModel gbm1 = null;
GBMModel gbm2 = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._min_rows = 1;
parms._max_depth = 2;
parms._nfolds = (int) tfr.numRows();
parms._fold_assignment = Model.Parameters.FoldAssignmentScheme.Modulo;
parms._ntrees = 3;
parms._seed = 12345;
parms._learn_rate = 1e-3f;
gbm1 = new GBM(parms).trainModel().get();
//parms._nfolds = (int) tfr.numRows() + 1; //This is now an error
gbm2 = new GBM(parms).trainModel().get();
ModelMetricsBinomial mm1 = (ModelMetricsBinomial) gbm1._output._cross_validation_metrics;
ModelMetricsBinomial mm2 = (ModelMetricsBinomial) gbm2._output._cross_validation_metrics;
assertEquals(mm1.auc_obj()._auc, mm2.auc_obj()._auc, 1e-12);
assertEquals(mm1.mse(), mm2.mse(), 1e-12);
//assertEquals(mm1.r2(), mm2.r2(), 1e-12);
assertEquals(mm1.logloss(), mm2.logloss(), 1e-12);
//TODO: add check: the correct number of individual models were built. PUBDEV-1690
} finally {
if (tfr != null)
tfr.remove();
if (gbm1 != null) {
gbm1.deleteCrossValidationModels();
gbm1.delete();
}
if (gbm2 != null) {
gbm2.deleteCrossValidationModels();
gbm2.delete();
}
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNFold.
@Test
public void testNFold() {
Frame tfr = null, vfr = null;
GBMModel gbm = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._weights_column = "weight";
parms._seed = 123;
parms._min_rows = 1;
parms._max_depth = 2;
parms._nfolds = 2;
parms._ntrees = 3;
parms._learn_rate = 1e-3f;
parms._keep_cross_validation_predictions = true;
// Build a first model; all remaining models should be equal
gbm = new GBM(parms).trainModel().get();
ModelMetricsBinomial mm = (ModelMetricsBinomial) gbm._output._cross_validation_metrics;
assertEquals(0.6296296296296297, mm.auc_obj()._auc, 1e-8);
assertEquals(0.28640022521234304, mm.mse(), 1e-8);
assertEquals(0.7674117059335286, mm.logloss(), 1e-6);
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (gbm != null) {
gbm.deleteCrossValidationModels();
gbm.delete();
for (Key k : gbm._output._cross_validation_predictions) k.remove();
gbm._output._cross_validation_holdout_predictions_frame_id.remove();
}
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testCatEncoding.
@Test
public void testCatEncoding() {
for (Model.Parameters.CategoricalEncodingScheme c : Model.Parameters.CategoricalEncodingScheme.values()) {
if (c == Model.Parameters.CategoricalEncodingScheme.OneHotInternal)
continue;
Frame tfr = null;
GBMModel gbm = null;
Frame fr2 = null;
try {
tfr = parse_test_file("./smalldata/junit/weather.csv");
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = tfr.lastVecName();
parms._ntrees = 5;
parms._categorical_encoding = c;
gbm = new GBM(parms).trainModel().get();
// Done building model; produce a score column with predictions
fr2 = gbm.score(tfr);
// Build a POJO, validate same results
Assert.assertTrue(gbm.testJavaScoring(tfr, fr2, 1e-15));
} finally {
if (tfr != null)
tfr.delete();
if (fr2 != null)
fr2.delete();
if (gbm != null)
gbm.deleteCrossValidationModels();
if (gbm != null)
gbm.delete();
}
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNA.
// PUBDEV-2822
@Test
public void testNA() {
String xy = ",0\n1,0\n2,0\n3,0\n4,-10\n,0";
Key tr = Key.make("train");
Frame df = ParseDataset.parse(tr, makeByteVec(Key.make("xy"), xy));
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tr;
parms._response_column = "C2";
parms._min_rows = 1;
parms._learn_rate = 1;
parms._ntrees = 1;
GBM job = new GBM(parms);
GBMModel gbm = job.trainModel().get();
Frame preds = gbm.score(df);
Log.info(df);
Log.info(preds);
Assert.assertTrue(gbm.testJavaScoring(df, preds, 1e-15));
Assert.assertTrue(Math.abs(preds.vec(0).at(0) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(1) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(2) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(3) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(4) - -10) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(5) - 0) < 1e-6);
preds.remove();
gbm.remove();
df.remove();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testGBMPredict.
// Predict with no actual, after training
@Test
public void testGBMPredict() {
GBMModel gbm = null;
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
Frame pred = null, res = null;
Scope.enter();
try {
Frame train = parse_test_file("smalldata/gbm_test/ecology_model.csv");
// Remove unique ID
train.remove("Site").remove();
int ci = train.find("Angaus");
// Convert response 'Angaus' to categorical
Scope.track(train.replace(ci, train.vecs()[ci].toCategoricalVec()));
// Update frame after hacking it
DKV.put(train);
parms._train = train._key;
// Train on the outcome
parms._response_column = "Angaus";
parms._distribution = DistributionFamily.multinomial;
gbm = new GBM(parms).trainModel().get();
pred = parse_test_file("smalldata/gbm_test/ecology_eval.csv");
// No response column during scoring
pred.remove("Angaus").remove();
res = gbm.score(pred);
// Build a POJO, validate same results
Assert.assertTrue(gbm.testJavaScoring(pred, res, 1e-15));
} finally {
parms._train.remove();
if (gbm != null)
gbm.delete();
if (pred != null)
pred.remove();
if (res != null)
res.remove();
Scope.exit();
}
}
Aggregations