use of hex.SplitFrame in project h2o-3 by h2oai.
the class SplitFrameHandler method run.
public SplitFrameV3 run(int version, SplitFrameV3 sf) {
SplitFrame splitFrame = sf.createAndFillImpl();
Job job = splitFrame.exec();
SplitFrameV3 spv3 = new SplitFrameV3(splitFrame);
spv3.key = new KeyV3.JobKeyV3(job._key);
return spv3;
}
use of hex.SplitFrame in project h2o-3 by h2oai.
the class DRFTest method sampleRatePerClass.
@Test
public void sampleRatePerClass() {
Frame tfr = null;
Key[] ksplits = null;
DRFModel drf = null;
try {
Scope.enter();
tfr = parse_test_file("smalldata/covtype/covtype.20k.data");
int resp = 54;
// tfr = parse_test_file("bigdata/laptop/mnist/train.csv.gz");
// int resp = 784;
Scope.track(tfr.replace(resp, tfr.vecs()[resp].toCategoricalVec()));
DKV.put(tfr);
SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("valid.hex") });
// Invoke the job
sf.exec().get();
ksplits = sf._destination_frames;
// Load data, hack frames
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
parms._response_column = tfr.names()[resp];
parms._min_split_improvement = 1e-5;
parms._ntrees = 20;
parms._score_tree_interval = parms._ntrees;
parms._max_depth = 15;
parms._seed = 1234;
parms._sample_rate_per_class = new double[] { 0.1f, 0.1f, 0.2f, 0.4f, 1f, 0.3f, 0.2f };
DRF job = new DRF(parms);
drf = job.trainModel().get();
if (drf != null)
drf.delete();
} finally {
if (drf != null)
drf.delete();
if (tfr != null)
tfr.delete();
if (ksplits[0] != null)
ksplits[0].remove();
if (ksplits[1] != null)
ksplits[1].remove();
Scope.exit();
}
}
use of hex.SplitFrame in project h2o-3 by h2oai.
the class NaiveBayesTest method testIrisValidation.
@Test
public void testIrisValidation() throws InterruptedException, ExecutionException {
NaiveBayesModel model = null;
Frame fr = null, fr2 = null;
Frame tr = null, te = null;
try {
fr = parse_test_file("smalldata/iris/iris_wheader.csv");
SplitFrame sf = new SplitFrame(fr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
// Invoke the job
sf.exec().get();
Key[] ksplits = sf._destination_frames;
tr = DKV.get(ksplits[0]).get();
te = DKV.get(ksplits[1]).get();
NaiveBayesParameters parms = new NaiveBayesParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
// Need Laplace smoothing
parms._laplace = 0.01;
parms._response_column = fr._names[4];
parms._compute_metrics = true;
model = new NaiveBayes(parms).trainModel().get();
// Done building model; produce a score column with class assignments
fr2 = model.score(te);
Assert.assertTrue(model.testJavaScoring(te, fr2, 1e-6));
} finally {
if (fr != null)
fr.delete();
if (fr2 != null)
fr2.delete();
if (tr != null)
tr.delete();
if (te != null)
te.delete();
if (model != null)
model.delete();
}
}
use of hex.SplitFrame in project h2o-3 by h2oai.
the class DRFTest method histoTypes.
@Test
public void histoTypes() {
Frame tfr = null;
Key[] ksplits = null;
DRFModel drf = null;
try {
Scope.enter();
tfr = parse_test_file("smalldata/covtype/covtype.20k.data");
int resp = 54;
// tfr = parse_test_file("bigdata/laptop/mnist/train.csv.gz");
// int resp = 784;
Scope.track(tfr.replace(resp, tfr.vecs()[resp].toCategoricalVec()));
DKV.put(tfr);
SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("valid.hex") });
// Invoke the job
sf.exec().get();
ksplits = sf._destination_frames;
SharedTreeModel.SharedTreeParameters.HistogramType[] histoType = SharedTreeModel.SharedTreeParameters.HistogramType.values();
final int N = histoType.length;
double[] loglosses = new double[N];
for (int i = 0; i < N; ++i) {
// Load data, hack frames
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
parms._response_column = tfr.names()[resp];
parms._histogram_type = histoType[i];
parms._ntrees = 10;
parms._score_tree_interval = parms._ntrees;
parms._max_depth = 10;
parms._seed = 12345;
parms._nbins = 20;
parms._nbins_top_level = 20;
DRF job = new DRF(parms);
drf = job.trainModel().get();
loglosses[i] = drf._output._scored_valid[drf._output._scored_valid.length - 1]._logloss;
if (drf != null)
drf.delete();
}
for (int i = 0; i < histoType.length; ++i) {
Log.info("histoType: " + histoType[i] + " -> validation logloss: " + loglosses[i]);
}
int idx = ArrayUtils.minIndex(loglosses);
Log.info("Optimal randomization: " + histoType[idx]);
//Quantiles are best
Assert.assertTrue(4 == idx);
} finally {
if (drf != null)
drf.delete();
if (tfr != null)
tfr.delete();
if (ksplits[0] != null)
ksplits[0].remove();
if (ksplits[1] != null)
ksplits[1].remove();
Scope.exit();
}
}
use of hex.SplitFrame in project h2o-3 by h2oai.
the class DRFTest method minSplitImprovement.
@Test
public void minSplitImprovement() {
Frame tfr = null;
Key[] ksplits = null;
DRFModel drf = null;
try {
Scope.enter();
tfr = parse_test_file("smalldata/covtype/covtype.20k.data");
int resp = 54;
// tfr = parse_test_file("bigdata/laptop/mnist/train.csv.gz");
// int resp = 784;
Scope.track(tfr.replace(resp, tfr.vecs()[resp].toCategoricalVec()));
DKV.put(tfr);
SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("valid.hex") });
// Invoke the job
sf.exec().get();
ksplits = sf._destination_frames;
double[] msi = new double[] { 0, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2 };
final int N = msi.length;
double[] loglosses = new double[N];
for (int i = 0; i < N; ++i) {
// Load data, hack frames
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
parms._response_column = tfr.names()[resp];
parms._min_split_improvement = msi[i];
parms._ntrees = 20;
parms._score_tree_interval = parms._ntrees;
parms._max_depth = 15;
parms._seed = 1234;
DRF job = new DRF(parms);
drf = job.trainModel().get();
loglosses[i] = drf._output._scored_valid[drf._output._scored_valid.length - 1]._logloss;
if (drf != null)
drf.delete();
}
for (int i = 0; i < msi.length; ++i) {
Log.info("min_split_improvement: " + msi[i] + " -> validation logloss: " + loglosses[i]);
}
int idx = ArrayUtils.minIndex(loglosses);
Log.info("Optimal min_split_improvement: " + msi[idx]);
Assert.assertTrue(0 != idx);
} finally {
if (drf != null)
drf.delete();
if (tfr != null)
tfr.delete();
if (ksplits[0] != null)
ksplits[0].remove();
if (ksplits[1] != null)
ksplits[1].remove();
Scope.exit();
}
}
Aggregations