use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testRowWeights.
@Test
public void testRowWeights() {
Frame tfr = null, vfr = null;
GBMModel gbm = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._weights_column = "weight";
parms._seed = 0xdecaf;
parms._min_rows = 1;
parms._max_depth = 2;
parms._ntrees = 3;
parms._learn_rate = 1e-3f;
// Build a first model; all remaining models should be equal
gbm = new GBM(parms).trainModel().get();
ModelMetricsBinomial mm = (ModelMetricsBinomial) gbm._output._training_metrics;
assertEquals(_AUC, mm.auc_obj()._auc, 1e-8);
assertEquals(_MSE, mm.mse(), 1e-8);
assertEquals(_LogLoss, mm.logloss(), 1e-6);
Frame pred = gbm.score(parms.train());
hex.ModelMetricsBinomial mm2 = hex.ModelMetricsBinomial.getFromDKV(gbm, parms.train());
assertEquals(_AUC, mm2.auc_obj()._auc, 1e-8);
assertEquals(_MSE, mm2.mse(), 1e-8);
assertEquals(_LogLoss, mm2.logloss(), 1e-6);
pred.remove();
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (gbm != null)
gbm.delete();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class ModelMetricsBinomial method make.
/**
* Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
* @param targetClassProbs A Vec containing target class probabilities
* @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
* @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given)
* @return ModelMetrics object
*/
public static ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels, String[] domain) {
Scope.enter();
Vec _labels = actualLabels.toCategoricalVec();
if (domain == null)
domain = _labels.domain();
if (_labels == null || targetClassProbs == null)
throw new IllegalArgumentException("Missing actualLabels or predictedProbs for binomial metrics!");
if (!targetClassProbs.isNumeric())
throw new IllegalArgumentException("Predicted probabilities must be numeric per-class probabilities for binomial metrics.");
if (targetClassProbs.min() < 0 || targetClassProbs.max() > 1)
throw new IllegalArgumentException("Predicted probabilities must be between 0 and 1 for binomial metrics.");
if (domain.length != 2)
throw new IllegalArgumentException("Domain must have 2 class labels, but is " + Arrays.toString(domain) + " for binomial metrics.");
_labels = _labels.adaptTo(domain);
if (_labels.cardinality() != 2)
throw new IllegalArgumentException("Adapted domain must have 2 class labels, but is " + Arrays.toString(_labels.domain()) + " for binomial metrics.");
Frame predsLabel = new Frame(targetClassProbs);
predsLabel.add("labels", _labels);
MetricBuilderBinomial mb = new BinomialMetrics(_labels.domain()).doAll(predsLabel)._mb;
_labels.remove();
Frame preds = new Frame(targetClassProbs);
ModelMetricsBinomial mm = (ModelMetricsBinomial) mb.makeModelMetrics(null, predsLabel, null, preds);
mm._description = "Computed on user-given predictions and labels, using F1-optimal threshold: " + mm.auc_obj().defaultThreshold() + ".";
Scope.exit();
return mm;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class Word2VecTest method testTransformAggregate.
@Test
public void testTransformAggregate() {
Scope.enter();
try {
Vec v = Scope.track(svec("a", "b"));
Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
DKV.put(fr);
// build an arbitrary w2v model & overwrite the learned vector with fixed values
Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
p._train = fr._key;
p._min_word_freq = 0;
p._epochs = 1;
p._vec_size = 2;
Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
DKV.put(w2vm);
String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
"a", "b", null, // no terminator at the end
"b" };
Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
} finally {
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class Word2VecTest method testW2V_pretrained.
@Test
public void testW2V_pretrained() {
String[] words = new String[1000];
double[] v1 = new double[words.length];
double[] v2 = new double[words.length];
for (int i = 0; i < words.length; i++) {
words[i] = "word" + i;
v1[i] = i / (float) words.length;
v2[i] = 1 - v1[i];
}
Scope.enter();
Frame pretrained = new TestFrameBuilder().withName("w2v-pretrained").withColNames("Word", "V1", "V2").withVecTypes(Vec.T_STR, Vec.T_NUM, Vec.T_NUM).withDataForCol(0, words).withDataForCol(1, v1).withDataForCol(2, v2).withChunkLayout(100, 100, 20, 80, 100, 100, 100, 100, 100, 100, 100).build();
Scope.track(pretrained);
try {
Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
p._vec_size = 2;
p._pre_trained = pretrained._key;
Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
for (int i = 0; i < words.length; i++) {
float[] wordVector = w2vm.transform(words[i]);
assertArrayEquals("wordvec " + i, new float[] { (float) v1[i], (float) v2[i] }, wordVector, 0.0001f);
}
} finally {
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class ModelSerializationTest method prepareDRFModel.
private DRFModel prepareDRFModel(String dataset, String[] ignoredColumns, String response, boolean classification, int ntrees) {
Frame f = parse_test_file(dataset);
try {
if (classification && !f.vec(response).isCategorical()) {
f.replace(f.find(response), f.vec(response).toCategoricalVec()).remove();
DKV.put(f._key, f);
}
DRFModel.DRFParameters drfParams = new DRFModel.DRFParameters();
drfParams._train = f._key;
drfParams._ignored_columns = ignoredColumns;
drfParams._response_column = response;
drfParams._ntrees = ntrees;
drfParams._score_each_iteration = true;
return new DRF(drfParams).trainModel().get();
} finally {
if (f != null)
f.delete();
}
}
Aggregations