use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testGBMRegressionGaussian.
@Test
public void testGBMRegressionGaussian() {
GBMModel gbm = null;
Frame fr = null, fr2 = null;
try {
fr = parse_test_file("./smalldata/gbm_test/Mfgdata_gaussian_GBM_testing.csv");
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._distribution = gaussian;
// Row in col 0, dependent in col 1, predictor in col 2
parms._response_column = fr._names[1];
parms._ntrees = 1;
parms._max_depth = 1;
parms._min_rows = 1;
parms._nbins = 20;
// Drop ColV2 0 (row), keep 1 (response), keep col 2 (only predictor), drop remaining cols
String[] xcols = parms._ignored_columns = new String[fr.numCols() - 2];
xcols[0] = fr._names[0];
System.arraycopy(fr._names, 3, xcols, 1, fr.numCols() - 3);
parms._learn_rate = 1.0f;
parms._score_each_iteration = true;
GBM job = new GBM(parms);
gbm = job.trainModel().get();
//HEX-1817
Assert.assertTrue(job.isStopped());
// Done building model; produce a score column with predictions
fr2 = gbm.score(fr);
//job.response() can be used in place of fr.vecs()[1] but it has been rebalanced
double sq_err = new MathUtils.SquareError().doAll(fr.vecs()[1], fr2.vecs()[0])._sum;
double mse = sq_err / fr2.numRows();
assertEquals(79152.12337641386, mse, 0.1);
assertEquals(79152.12337641386, gbm._output._scored_train[1]._mse, 0.1);
assertEquals(79152.12337641386, gbm._output._scored_train[1]._mean_residual_deviance, 0.1);
} finally {
if (fr != null)
fr.remove();
if (fr2 != null)
fr2.remove();
if (gbm != null)
gbm.remove();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testKDDTrees.
// Test uses big data and is too slow for a pre-push
@Test
@Ignore
public void testKDDTrees() {
Frame tfr = null, vfr = null;
String[] cols = new String[] { "DOB", "LASTGIFT", "TARGET_D" };
try {
// Load data, hack frames
Frame inF1 = parse_test_file("bigdata/laptop/usecases/cup98LRN_z.csv");
Frame inF2 = parse_test_file("bigdata/laptop/usecases/cup98VAL_z.csv");
// Just the columns to train on
tfr = inF1.subframe(cols);
vfr = inF2.subframe(cols);
// Toss all the rest away
inF1.remove(cols).remove();
inF2.remove(cols).remove();
// Convert 'DOB' to categorical
tfr.replace(0, tfr.vec("DOB").toCategoricalVec());
vfr.replace(0, vfr.vec("DOB").toCategoricalVec());
DKV.put(tfr);
DKV.put(vfr);
// Same parms for all
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._valid = vfr._key;
parms._response_column = "TARGET_D";
parms._ntrees = 3;
parms._distribution = gaussian;
// Build a first model; all remaining models should be equal
GBM job1 = new GBM(parms);
GBMModel gbm1 = job1.trainModel().get();
// Validation MSE should be equal
ScoreKeeper[] firstScored = gbm1._output._scored_valid;
// Build 10 more models, checking for equality
for (int i = 0; i < 10; i++) {
GBM job2 = new GBM(parms);
GBMModel gbm2 = job2.trainModel().get();
ScoreKeeper[] secondScored = gbm2._output._scored_valid;
// Check that MSE's from both models are equal
int j;
for (j = 0; j < firstScored.length; j++) if (firstScored[j] != secondScored[j])
// Not Equals Enough
break;
// Report on unequal
if (j < firstScored.length) {
System.out.println("=== =============== ===");
System.out.println("=== ORIGINAL MODEL ===");
for (int t = 0; t < parms._ntrees; t++) System.out.println(gbm1._output.toStringTree(t, 0));
System.out.println("=== DIFFERENT MODEL ===");
for (int t = 0; t < parms._ntrees; t++) System.out.println(gbm2._output.toStringTree(t, 0));
System.out.println("=== =============== ===");
Assert.assertArrayEquals("GBM should have the exact same MSEs for identical parameters", firstScored, secondScored);
}
gbm2.delete();
}
gbm1.delete();
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testModelAdaptMultinomial.
// Adapt a trained model to a test dataset with different categoricals
@Test
public void testModelAdaptMultinomial() {
GBMModel gbm = null;
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
try {
Scope.enter();
Frame v;
parms._train = (parse_test_file("smalldata/junit/mixcat_train.csv"))._key;
parms._valid = (v = parse_test_file("smalldata/junit/mixcat_test.csv"))._key;
// Train on the outcome
parms._response_column = "Response";
// Build a CART tree - 1 tree, full learn rate, down to 1 row
parms._ntrees = 1;
parms._learn_rate = 1.0f;
parms._min_rows = 1;
parms._distribution = DistributionFamily.multinomial;
gbm = new GBM(parms).trainModel().get();
Frame res = gbm.score(v);
int[] ps = new int[(int) v.numRows()];
Vec.Reader vr = res.vecs()[0].new Reader();
for (int i = 0; i < ps.length; i++) ps[i] = (int) vr.at8(i);
// Expected predictions are X,X,Y,Y,X,Y,Z,X,Y
// Never predicts W, the extra class in the test set.
// Badly predicts Z because 1 tree does not pick up that feature#2 can also
// be used to predict Z, and instead relies on factor C which does not appear
// in the test set.
Assert.assertArrayEquals("", ps, new int[] { 1, 1, 2, 2, 1, 2, 3, 1, 2 });
hex.ModelMetricsMultinomial mm = hex.ModelMetricsMultinomial.getFromDKV(gbm, parms.valid());
// Build a POJO, validate same results
Assert.assertTrue(gbm.testJavaScoring(v, res, 1e-15));
res.remove();
} finally {
parms._train.remove();
parms._valid.remove();
if (gbm != null)
gbm.delete();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNfoldsInvalidValues.
@Test
public void testNfoldsInvalidValues() {
Frame tfr = null;
GBMModel gbm1 = null;
GBMModel gbm2 = null;
GBMModel gbm3 = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._min_rows = 1;
parms._seed = 12345;
parms._max_depth = 2;
parms._ntrees = 3;
parms._learn_rate = 1e-3f;
parms._nfolds = 0;
gbm1 = new GBM(parms).trainModel().get();
parms._nfolds = 1;
try {
Log.info("Trying nfolds==1.");
gbm2 = new GBM(parms).trainModel().get();
Assert.fail("Should toss H2OModelBuilderIllegalArgumentException instead of reaching here");
} catch (H2OModelBuilderIllegalArgumentException e) {
}
parms._nfolds = -99;
try {
Log.info("Trying nfolds==-99.");
gbm3 = new GBM(parms).trainModel().get();
Assert.fail("Should toss H2OModelBuilderIllegalArgumentException instead of reaching here");
} catch (H2OModelBuilderIllegalArgumentException e) {
}
} finally {
if (tfr != null)
tfr.remove();
if (gbm1 != null)
gbm1.delete();
if (gbm2 != null)
gbm2.delete();
if (gbm3 != null)
gbm3.delete();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testQuantileRegression.
@Test
public void testQuantileRegression() {
GBMModel gbm = null;
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
Frame pred = null, res = null;
Scope.enter();
try {
Frame train = parse_test_file("smalldata/gbm_test/ecology_model.csv");
// Remove unique ID
train.remove("Site").remove();
// Remove categorical
train.remove("Method").remove();
// Update frame after hacking it
DKV.put(train);
parms._train = train._key;
// Train on the outcome
parms._response_column = "DSDist";
parms._distribution = DistributionFamily.quantile;
parms._quantile_alpha = 0.4;
parms._sample_rate = 0.6f;
parms._col_sample_rate = 0.8f;
parms._col_sample_rate_per_tree = 0.8f;
parms._seed = 1234;
GBM job = new GBM(parms);
gbm = job.trainModel().get();
pred = parse_test_file("smalldata/gbm_test/ecology_eval.csv");
res = gbm.score(pred);
// Build a POJO, validate same results
Assert.assertTrue(gbm.testJavaScoring(pred, res, 1e-15));
Assert.assertTrue(Math.abs(((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance - 10.69611) < 1e-4);
} finally {
parms._train.remove();
if (gbm != null)
gbm.delete();
if (pred != null)
pred.remove();
if (res != null)
res.remove();
Scope.exit();
}
}
Aggregations