use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testHuberDeltaTiny.
@Test
public void testHuberDeltaTiny() {
Frame tfr = null;
GBMModel gbm = null;
try {
tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = tfr.lastVecName();
parms._seed = 0xdecaf;
parms._distribution = huber;
//everything is an outlier and we should get laplace loss
parms._huber_alpha = 1e-2;
gbm = new GBM(parms).trainModel().get();
Assert.assertEquals(8.05716257, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 0.3);
// Huber loss can be derived from MAE since no obs weights
//hardcoded from output
double delta = 0.0047234;
//see laplace above
double MAE = 1.42298;
Assert.assertEquals((2 * MAE - delta) * delta, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 2e-4);
} finally {
if (tfr != null)
tfr.delete();
if (gbm != null)
gbm.deleteCrossValidationModels();
if (gbm != null)
gbm.delete();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testReproducibilityAirline.
// PUBDEV-557 Test dependency on # nodes (for small number of bins, but fixed number of chunks)
@Test
public void testReproducibilityAirline() {
Frame tfr = null;
final int N = 1;
double[] mses = new double[N];
Scope.enter();
try {
// Load data, hack frames
tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");
// rebalance to fixed number of chunks
Key dest = Key.make("df.rebalanced.hex");
RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
H2O.submitTask(rb);
rb.join();
tfr.delete();
tfr = DKV.get(dest).get();
// DKV.put(tfr);
for (String s : new String[] { "DepTime", "ArrTime", "ActualElapsedTime", "AirTime", "ArrDelay", "DepDelay", "Cancelled", "CancellationCode", "CarrierDelay", "WeatherDelay", "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed" }) {
tfr.remove(s).remove();
}
DKV.put(tfr);
for (int i = 0; i < N; ++i) {
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "IsDepDelayed";
parms._nbins = 10;
parms._nbins_cats = 1024;
parms._ntrees = 7;
parms._max_depth = 10;
parms._binomial_double_trees = false;
parms._mtries = -1;
parms._min_rows = 1;
// Simulated sampling with replacement
parms._sample_rate = 0.632f;
parms._balance_classes = true;
parms._seed = (1L << 32) | 2;
// Build a first model; all remaining models should be equal
DRFModel drf = new DRF(parms).trainModel().get();
assertEquals(drf._output._ntrees, parms._ntrees);
mses[i] = drf._output._training_metrics.mse();
drf.delete();
}
} finally {
if (tfr != null)
tfr.remove();
}
Scope.exit();
for (int i = 0; i < mses.length; ++i) {
Log.info("trial: " + i + " -> MSE: " + mses[i]);
}
for (int i = 0; i < mses.length; ++i) {
//check for the same result on 1 nodes and 5 nodes
assertEquals(0.20377446328850304, mses[i], 1e-4);
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testNfoldsCVAndValidation.
@Test
public void testNfoldsCVAndValidation() {
Frame tfr = null, vfr = null;
DRFModel drf = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
vfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._valid = vfr._key;
parms._response_column = "response";
parms._min_rows = 2;
parms._max_depth = 2;
parms._nfolds = 2;
parms._ntrees = 3;
parms._seed = 11233;
try {
Log.info("Trying N-fold cross-validation AND Validation dataset provided.");
drf = new DRF(parms).trainModel().get();
} catch (H2OModelBuilderIllegalArgumentException e) {
Assert.fail("Should not toss H2OModelBuilderIllegalArgumentException.");
}
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (drf != null) {
drf.deleteCrossValidationModels();
drf.delete();
}
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testRowWeightsTiny.
@Test
public void testRowWeightsTiny() {
Frame tfr = null, vfr = null;
DRFModel drf = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights_all_tiny.csv");
DKV.put(tfr);
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._weights_column = "weight";
parms._seed = 234;
// in terms of weighted rows
parms._min_rows = 0.01242;
parms._max_depth = 2;
parms._ntrees = 3;
// Build a first model; all remaining models should be equal
drf = new DRF(parms).trainModel().get();
// OOB
ModelMetricsBinomial mm = (ModelMetricsBinomial) drf._output._training_metrics;
assertEquals(_AUC, mm.auc_obj()._auc, 1e-8);
assertEquals(_MSE, mm.mse(), 1e-8);
assertEquals(_LogLoss, mm.logloss(), 1e-6);
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (drf != null)
drf.delete();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testStochasticDRFEquivalent.
@Test
public void testStochasticDRFEquivalent() {
Frame tfr = null, vfr = null;
DRFModel drf = null;
Scope.enter();
try {
tfr = parse_test_file("./smalldata/junit/cars.csv");
for (String s : new String[] { "name" }) {
tfr.remove(s).remove();
}
DKV.put(tfr);
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
//regression
parms._response_column = "cylinders";
parms._seed = 234;
parms._min_rows = 2;
parms._max_depth = 5;
parms._ntrees = 5;
parms._mtries = 3;
parms._sample_rate = 0.5f;
// Build a first model; all remaining models should be equal
drf = new DRF(parms).trainModel().get();
ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
assertEquals(0.12358322821934015, mm.mse(), 1e-4);
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (drf != null)
drf.delete();
Scope.exit();
}
}
Aggregations