Search in sources :

Example 21 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class ModelMetricsBinomial method make.

/**
   * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
   * @param targetClassProbs A Vec containing target class probabilities
   * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
   * @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given)
   * @return ModelMetrics object
   */
public static ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels, String[] domain) {
    Scope.enter();
    Vec _labels = actualLabels.toCategoricalVec();
    if (domain == null)
        domain = _labels.domain();
    if (_labels == null || targetClassProbs == null)
        throw new IllegalArgumentException("Missing actualLabels or predictedProbs for binomial metrics!");
    if (!targetClassProbs.isNumeric())
        throw new IllegalArgumentException("Predicted probabilities must be numeric per-class probabilities for binomial metrics.");
    if (targetClassProbs.min() < 0 || targetClassProbs.max() > 1)
        throw new IllegalArgumentException("Predicted probabilities must be between 0 and 1 for binomial metrics.");
    if (domain.length != 2)
        throw new IllegalArgumentException("Domain must have 2 class labels, but is " + Arrays.toString(domain) + " for binomial metrics.");
    _labels = _labels.adaptTo(domain);
    if (_labels.cardinality() != 2)
        throw new IllegalArgumentException("Adapted domain must have 2 class labels, but is " + Arrays.toString(_labels.domain()) + " for binomial metrics.");
    Frame predsLabel = new Frame(targetClassProbs);
    predsLabel.add("labels", _labels);
    MetricBuilderBinomial mb = new BinomialMetrics(_labels.domain()).doAll(predsLabel)._mb;
    _labels.remove();
    Frame preds = new Frame(targetClassProbs);
    ModelMetricsBinomial mm = (ModelMetricsBinomial) mb.makeModelMetrics(null, predsLabel, null, preds);
    mm._description = "Computed on user-given predictions and labels, using F1-optimal threshold: " + mm.auc_obj().defaultThreshold() + ".";
    Scope.exit();
    return mm;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 22 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class Word2VecTest method testTransformAggregate.

@Test
public void testTransformAggregate() {
    Scope.enter();
    try {
        Vec v = Scope.track(svec("a", "b"));
        Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
        DKV.put(fr);
        // build an arbitrary w2v model & overwrite the learned vector with fixed values
        Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
        p._train = fr._key;
        p._min_word_freq = 0;
        p._epochs = 1;
        p._vec_size = 2;
        Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
        w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
        DKV.put(w2vm);
        String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
        "a", "b", null, // no terminator at the end
        "b" };
        Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
        Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
        Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
        Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
        assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
        assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
    } finally {
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) Vec(water.fvec.Vec) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 23 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GBMTest method testNfoldsColumn.

@Test
public void testNfoldsColumn() {
    Frame tfr = null;
    GBMModel gbm1 = null;
    try {
        tfr = parse_test_file("smalldata/junit/cars_20mpg.csv");
        // Remove unique id
        tfr.remove("name").remove();
        DKV.put(tfr);
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "economy_20mpg";
        parms._fold_column = "cylinders";
        Vec old = tfr.remove("cylinders");
        tfr.add("cylinders", old.toCategoricalVec());
        DKV.put(tfr);
        parms._ntrees = 10;
        parms._keep_cross_validation_fold_assignment = true;
        GBM job1 = new GBM(parms);
        gbm1 = job1.trainModel().get();
        Assert.assertTrue(gbm1._output._cross_validation_models.length == 5);
        old.remove();
    } finally {
        if (tfr != null)
            tfr.remove();
        if (gbm1 != null) {
            gbm1.deleteCrossValidationModels();
            gbm1.delete();
            gbm1._output._cross_validation_fold_assignment_frame_id.remove();
        }
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) FVecTest.makeByteVec(water.fvec.FVecTest.makeByteVec) Test(org.junit.Test)

Example 24 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AggregatorTest method testDomains.

@Test
public void testDomains() {
    Frame frame = parse_test_file("smalldata/junit/weather.csv");
    for (String s : new String[] { "MaxWindSpeed", "RelHumid9am", "Cloud9am" }) {
        Vec v = frame.vec(s);
        Vec newV = v.toCategoricalVec();
        frame.remove(s);
        frame.add(s, newV);
        v.remove();
    }
    DKV.put(frame);
    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._target_num_exemplars = 17;
    AggregatorModel agg = new Aggregator(parms).trainModel().get();
    Frame output = agg._output._output_frame.get();
    Assert.assertTrue(output.numRows() <= 17);
    boolean same = true;
    for (int i = 0; i < frame.numCols(); ++i) {
        if (frame.vec(i).isCategorical()) {
            same = (frame.domains()[i].length == output.domains()[i].length);
            if (!same)
                break;
        }
    }
    frame.remove();
    output.remove();
    agg.remove();
    Assert.assertFalse(same);
}
Also used : CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) Aggregator(hex.aggregator.Aggregator) AggregatorModel(hex.aggregator.AggregatorModel) Test(org.junit.Test)

Example 25 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GBMTest method testNfoldsColumnNumbersFrom0.

@Test
public void testNfoldsColumnNumbersFrom0() {
    Frame tfr = null;
    Vec old = null;
    GBMModel gbm1 = null;
    try {
        tfr = parse_test_file("smalldata/junit/cars_20mpg.csv");
        // Remove unique id
        tfr.remove("name").remove();
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c.len(); ++i) {
                    if (c.at8(i) == 3)
                        c.set(i, 0);
                    if (c.at8(i) == 4)
                        c.set(i, 1);
                    if (c.at8(i) == 5)
                        c.set(i, 2);
                    if (c.at8(i) == 6)
                        c.set(i, 3);
                    if (c.at8(i) == 8)
                        c.set(i, 4);
                }
            }
        }.doAll(tfr.vec("cylinders"));
        DKV.put(tfr);
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "economy_20mpg";
        parms._fold_column = "cylinders";
        parms._ntrees = 10;
        GBM job1 = new GBM(parms);
        gbm1 = job1.trainModel().get();
        Assert.assertTrue(gbm1._output._cross_validation_models.length == 5);
    } finally {
        if (tfr != null)
            tfr.remove();
        if (old != null)
            old.remove();
        if (gbm1 != null) {
            gbm1.deleteCrossValidationModels();
            gbm1.delete();
        }
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) FVecTest.makeByteVec(water.fvec.FVecTest.makeByteVec) Chunk(water.fvec.Chunk) Test(org.junit.Test)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9