use of water.fvec.Vec in project h2o-3 by h2oai.
the class ModelMetricsBinomial method make.
/**
* Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
* @param targetClassProbs A Vec containing target class probabilities
* @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
* @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given)
* @return ModelMetrics object
*/
public static ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels, String[] domain) {
Scope.enter();
Vec _labels = actualLabels.toCategoricalVec();
if (domain == null)
domain = _labels.domain();
if (_labels == null || targetClassProbs == null)
throw new IllegalArgumentException("Missing actualLabels or predictedProbs for binomial metrics!");
if (!targetClassProbs.isNumeric())
throw new IllegalArgumentException("Predicted probabilities must be numeric per-class probabilities for binomial metrics.");
if (targetClassProbs.min() < 0 || targetClassProbs.max() > 1)
throw new IllegalArgumentException("Predicted probabilities must be between 0 and 1 for binomial metrics.");
if (domain.length != 2)
throw new IllegalArgumentException("Domain must have 2 class labels, but is " + Arrays.toString(domain) + " for binomial metrics.");
_labels = _labels.adaptTo(domain);
if (_labels.cardinality() != 2)
throw new IllegalArgumentException("Adapted domain must have 2 class labels, but is " + Arrays.toString(_labels.domain()) + " for binomial metrics.");
Frame predsLabel = new Frame(targetClassProbs);
predsLabel.add("labels", _labels);
MetricBuilderBinomial mb = new BinomialMetrics(_labels.domain()).doAll(predsLabel)._mb;
_labels.remove();
Frame preds = new Frame(targetClassProbs);
ModelMetricsBinomial mm = (ModelMetricsBinomial) mb.makeModelMetrics(null, predsLabel, null, preds);
mm._description = "Computed on user-given predictions and labels, using F1-optimal threshold: " + mm.auc_obj().defaultThreshold() + ".";
Scope.exit();
return mm;
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class Word2VecTest method testTransformAggregate.
@Test
public void testTransformAggregate() {
Scope.enter();
try {
Vec v = Scope.track(svec("a", "b"));
Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
DKV.put(fr);
// build an arbitrary w2v model & overwrite the learned vector with fixed values
Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
p._train = fr._key;
p._min_word_freq = 0;
p._epochs = 1;
p._vec_size = 2;
Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
DKV.put(w2vm);
String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
"a", "b", null, // no terminator at the end
"b" };
Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
} finally {
Scope.exit();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class GBMTest method testNfoldsColumn.
@Test
public void testNfoldsColumn() {
Frame tfr = null;
GBMModel gbm1 = null;
try {
tfr = parse_test_file("smalldata/junit/cars_20mpg.csv");
// Remove unique id
tfr.remove("name").remove();
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "economy_20mpg";
parms._fold_column = "cylinders";
Vec old = tfr.remove("cylinders");
tfr.add("cylinders", old.toCategoricalVec());
DKV.put(tfr);
parms._ntrees = 10;
parms._keep_cross_validation_fold_assignment = true;
GBM job1 = new GBM(parms);
gbm1 = job1.trainModel().get();
Assert.assertTrue(gbm1._output._cross_validation_models.length == 5);
old.remove();
} finally {
if (tfr != null)
tfr.remove();
if (gbm1 != null) {
gbm1.deleteCrossValidationModels();
gbm1.delete();
gbm1._output._cross_validation_fold_assignment_frame_id.remove();
}
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AggregatorTest method testDomains.
@Test
public void testDomains() {
Frame frame = parse_test_file("smalldata/junit/weather.csv");
for (String s : new String[] { "MaxWindSpeed", "RelHumid9am", "Cloud9am" }) {
Vec v = frame.vec(s);
Vec newV = v.toCategoricalVec();
frame.remove(s);
frame.add(s, newV);
v.remove();
}
DKV.put(frame);
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = 17;
AggregatorModel agg = new Aggregator(parms).trainModel().get();
Frame output = agg._output._output_frame.get();
Assert.assertTrue(output.numRows() <= 17);
boolean same = true;
for (int i = 0; i < frame.numCols(); ++i) {
if (frame.vec(i).isCategorical()) {
same = (frame.domains()[i].length == output.domains()[i].length);
if (!same)
break;
}
}
frame.remove();
output.remove();
agg.remove();
Assert.assertFalse(same);
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class GBMTest method testNfoldsColumnNumbersFrom0.
@Test
public void testNfoldsColumnNumbersFrom0() {
Frame tfr = null;
Vec old = null;
GBMModel gbm1 = null;
try {
tfr = parse_test_file("smalldata/junit/cars_20mpg.csv");
// Remove unique id
tfr.remove("name").remove();
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c.len(); ++i) {
if (c.at8(i) == 3)
c.set(i, 0);
if (c.at8(i) == 4)
c.set(i, 1);
if (c.at8(i) == 5)
c.set(i, 2);
if (c.at8(i) == 6)
c.set(i, 3);
if (c.at8(i) == 8)
c.set(i, 4);
}
}
}.doAll(tfr.vec("cylinders"));
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "economy_20mpg";
parms._fold_column = "cylinders";
parms._ntrees = 10;
GBM job1 = new GBM(parms);
gbm1 = job1.trainModel().get();
Assert.assertTrue(gbm1._output._cross_validation_models.length == 5);
} finally {
if (tfr != null)
tfr.remove();
if (old != null)
old.remove();
if (gbm1 != null) {
gbm1.deleteCrossValidationModels();
gbm1.delete();
}
}
}
Aggregations