Search in sources :

Example 36 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testRowWeights.

@Test
public void testRowWeights() {
    Frame tfr = null, vfr = null;
    GBMModel gbm = null;
    Scope.enter();
    try {
        tfr = parse_test_file("smalldata/junit/weights.csv");
        DKV.put(tfr);
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "response";
        parms._weights_column = "weight";
        parms._seed = 0xdecaf;
        parms._min_rows = 1;
        parms._max_depth = 2;
        parms._ntrees = 3;
        parms._learn_rate = 1e-3f;
        // Build a first model; all remaining models should be equal
        gbm = new GBM(parms).trainModel().get();
        ModelMetricsBinomial mm = (ModelMetricsBinomial) gbm._output._training_metrics;
        assertEquals(_AUC, mm.auc_obj()._auc, 1e-8);
        assertEquals(_MSE, mm.mse(), 1e-8);
        assertEquals(_LogLoss, mm.logloss(), 1e-6);
        Frame pred = gbm.score(parms.train());
        hex.ModelMetricsBinomial mm2 = hex.ModelMetricsBinomial.getFromDKV(gbm, parms.train());
        assertEquals(_AUC, mm2.auc_obj()._auc, 1e-8);
        assertEquals(_MSE, mm2.mse(), 1e-8);
        assertEquals(_LogLoss, mm2.logloss(), 1e-6);
        pred.remove();
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (gbm != null)
            gbm.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) hex(hex) Test(org.junit.Test)

Example 37 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class ModelMetricsBinomial method make.

/**
   * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
   * @param targetClassProbs A Vec containing target class probabilities
   * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
   * @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given)
   * @return ModelMetrics object
   */
public static ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels, String[] domain) {
    Scope.enter();
    Vec _labels = actualLabels.toCategoricalVec();
    if (domain == null)
        domain = _labels.domain();
    if (_labels == null || targetClassProbs == null)
        throw new IllegalArgumentException("Missing actualLabels or predictedProbs for binomial metrics!");
    if (!targetClassProbs.isNumeric())
        throw new IllegalArgumentException("Predicted probabilities must be numeric per-class probabilities for binomial metrics.");
    if (targetClassProbs.min() < 0 || targetClassProbs.max() > 1)
        throw new IllegalArgumentException("Predicted probabilities must be between 0 and 1 for binomial metrics.");
    if (domain.length != 2)
        throw new IllegalArgumentException("Domain must have 2 class labels, but is " + Arrays.toString(domain) + " for binomial metrics.");
    _labels = _labels.adaptTo(domain);
    if (_labels.cardinality() != 2)
        throw new IllegalArgumentException("Adapted domain must have 2 class labels, but is " + Arrays.toString(_labels.domain()) + " for binomial metrics.");
    Frame predsLabel = new Frame(targetClassProbs);
    predsLabel.add("labels", _labels);
    MetricBuilderBinomial mb = new BinomialMetrics(_labels.domain()).doAll(predsLabel)._mb;
    _labels.remove();
    Frame preds = new Frame(targetClassProbs);
    ModelMetricsBinomial mm = (ModelMetricsBinomial) mb.makeModelMetrics(null, predsLabel, null, preds);
    mm._description = "Computed on user-given predictions and labels, using F1-optimal threshold: " + mm.auc_obj().defaultThreshold() + ".";
    Scope.exit();
    return mm;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 38 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class Word2VecTest method testTransformAggregate.

@Test
public void testTransformAggregate() {
    Scope.enter();
    try {
        Vec v = Scope.track(svec("a", "b"));
        Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
        DKV.put(fr);
        // build an arbitrary w2v model & overwrite the learned vector with fixed values
        Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
        p._train = fr._key;
        p._min_word_freq = 0;
        p._epochs = 1;
        p._vec_size = 2;
        Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
        w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
        DKV.put(w2vm);
        String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
        "a", "b", null, // no terminator at the end
        "b" };
        Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
        Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
        Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
        Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
        assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
        assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
    } finally {
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) Vec(water.fvec.Vec) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 39 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class Word2VecTest method testW2V_pretrained.

@Test
public void testW2V_pretrained() {
    String[] words = new String[1000];
    double[] v1 = new double[words.length];
    double[] v2 = new double[words.length];
    for (int i = 0; i < words.length; i++) {
        words[i] = "word" + i;
        v1[i] = i / (float) words.length;
        v2[i] = 1 - v1[i];
    }
    Scope.enter();
    Frame pretrained = new TestFrameBuilder().withName("w2v-pretrained").withColNames("Word", "V1", "V2").withVecTypes(Vec.T_STR, Vec.T_NUM, Vec.T_NUM).withDataForCol(0, words).withDataForCol(1, v1).withDataForCol(2, v2).withChunkLayout(100, 100, 20, 80, 100, 100, 100, 100, 100, 100, 100).build();
    Scope.track(pretrained);
    try {
        Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
        p._vec_size = 2;
        p._pre_trained = pretrained._key;
        Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
        for (int i = 0; i < words.length; i++) {
            float[] wordVector = w2vm.transform(words[i]);
            assertArrayEquals("wordvec " + i, new float[] { (float) v1[i], (float) v2[i] }, wordVector, 0.0001f);
        }
    } finally {
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) BufferedString(water.parser.BufferedString)

Example 40 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class ModelSerializationTest method prepareDRFModel.

private DRFModel prepareDRFModel(String dataset, String[] ignoredColumns, String response, boolean classification, int ntrees) {
    Frame f = parse_test_file(dataset);
    try {
        if (classification && !f.vec(response).isCategorical()) {
            f.replace(f.find(response), f.vec(response).toCategoricalVec()).remove();
            DKV.put(f._key, f);
        }
        DRFModel.DRFParameters drfParams = new DRFModel.DRFParameters();
        drfParams._train = f._key;
        drfParams._ignored_columns = ignoredColumns;
        drfParams._response_column = response;
        drfParams._ntrees = ntrees;
        drfParams._score_each_iteration = true;
        return new DRF(drfParams).trainModel().get();
    } finally {
        if (f != null)
            f.delete();
    }
}
Also used : Frame(water.fvec.Frame) DRFModel(hex.tree.drf.DRFModel) DRF(hex.tree.drf.DRF)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16