Search in sources :

Example 81 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testReproducibility.

//
@Test
public void testReproducibility() {
    Frame tfr = null;
    final int N = 5;
    double[] mses = new double[N];
    Scope.enter();
    try {
        // Load data, hack frames
        tfr = parse_test_file("smalldata/covtype/covtype.20k.data");
        // rebalance to 256 chunks
        Key dest = Key.make("df.rebalanced.hex");
        RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
        H2O.submitTask(rb);
        rb.join();
        tfr.delete();
        tfr = DKV.get(dest).get();
        for (int i = 0; i < N; ++i) {
            DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
            parms._train = tfr._key;
            parms._response_column = "C55";
            parms._nbins = 1000;
            parms._ntrees = 1;
            parms._max_depth = 8;
            parms._mtries = -1;
            parms._min_rows = 10;
            parms._seed = 1234;
            // Build a first model; all remaining models should be equal
            DRFModel drf = new DRF(parms).trainModel().get();
            assertEquals(drf._output._ntrees, parms._ntrees);
            mses[i] = drf._output._scored_train[drf._output._scored_train.length - 1]._mse;
            drf.delete();
        }
    } finally {
        if (tfr != null)
            tfr.remove();
    }
    Scope.exit();
    for (int i = 0; i < mses.length; ++i) {
        Log.info("trial: " + i + " -> MSE: " + mses[i]);
    }
    for (double mse : mses) assertEquals(mse, mses[0], 1e-15);
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) RebalanceDataSet(water.fvec.RebalanceDataSet) Test(org.junit.Test)

Example 82 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testRowWeights.

@Test
public void testRowWeights() {
    Frame tfr = null, vfr = null;
    DRFModel drf = null;
    Scope.enter();
    try {
        tfr = parse_test_file("smalldata/junit/weights.csv");
        DKV.put(tfr);
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        parms._response_column = "response";
        parms._weights_column = "weight";
        parms._seed = 234;
        parms._min_rows = 1;
        parms._max_depth = 2;
        parms._ntrees = 3;
        // Build a first model; all remaining models should be equal
        drf = new DRF(parms).trainModel().get();
        // OOB
        // Reduced number of rows changes the row sampling -> results differ
        ModelMetricsBinomial mm = (ModelMetricsBinomial) drf._output._training_metrics;
        assertEquals(1.0, mm.auc_obj()._auc, 1e-8);
        assertEquals(0.05823863636363636, mm.mse(), 1e-8);
        assertEquals(0.21035264541934587, mm.logloss(), 1e-6);
        // test set scoring (on the same dataset, but without normalizing the weights)
        Frame pred = drf.score(parms.train());
        hex.ModelMetricsBinomial mm2 = hex.ModelMetricsBinomial.getFromDKV(drf, parms.train());
        // Non-OOB
        assertEquals(1, mm2.auc_obj()._auc, 1e-8);
        assertEquals(0.0154320987654321, mm2.mse(), 1e-8);
        assertEquals(0.08349430638608361, mm2.logloss(), 1e-8);
        pred.remove();
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (drf != null)
            drf.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) ModelMetricsBinomial(hex.ModelMetricsBinomial) ModelMetricsBinomial(hex.ModelMetricsBinomial) Test(org.junit.Test)

Example 83 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class FramesV3 method fillFromImpl.

@Override
public FramesV3 fillFromImpl(Frames f) {
    this.frame_id = new KeyV3.FrameKeyV3(f.frame_id);
    // NOTE: this is needed for request handling, but isn't really part of state
    this.column = f.column;
    this.find_compatible_models = f.find_compatible_models;
    if (f.frames != null) {
        this.frames = new FrameV3[f.frames.length];
        int i = 0;
        for (Frame frame : f.frames) {
            this.frames[i++] = new FrameV3(frame, f.row_offset, f.row_count);
        }
    }
    return this;
}
Also used : Frame(water.fvec.Frame)

Example 84 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class ModelMetricsBaseV3 method fillFromImpl.

@Override
public S fillFromImpl(ModelMetrics modelMetrics) {
    // If we're copying in a Model we need a ModelSchemaV3 of the right class to fill into.
    Model m = modelMetrics.model();
    if (m != null) {
        this.model = new ModelKeyV3(m._key);
        this.model_category = m._output.getModelCategory();
        this.model_checksum = m.checksum();
    }
    // If we're copying in a Frame we need a Frame Schema of the right class to fill into.
    Frame f = modelMetrics.frame();
    if (null != f) {
        //true == f.getClass().getSuperclass().getGenericSuperclass() instanceof ParameterizedType
        this.frame = new FrameKeyV3(f._key);
        this.frame_checksum = f.checksum();
    }
    PojoUtils.copyProperties(this, modelMetrics, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "model", "model_category", "model_checksum", "frame", "frame_checksum" });
    RMSE = modelMetrics.rmse();
    return (S) this;
}
Also used : Frame(water.fvec.Frame) Model(hex.Model) ModelKeyV3(water.api.schemas3.KeyV3.ModelKeyV3) FrameKeyV3(water.api.schemas3.KeyV3.FrameKeyV3)

Example 85 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstCut method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    double[] cuts = check(asts[2]);
    Arrays.sort(cuts);
    String[] labels = check2(asts[3]);
    final boolean lowest = asts[4].exec(env).getNum() == 1;
    final boolean rite = asts[5].exec(env).getNum() == 1;
    // cap at 12
    final int digits = Math.min((int) asts[6].exec(env).getNum(), 12);
    if (fr.vecs().length != 1 || fr.vecs()[0].isCategorical())
        throw new IllegalArgumentException("First argument must be a numeric column vector");
    double fmin = fr.anyVec().min();
    double fmax = fr.anyVec().max();
    // c(0,10,100) -> 2 bins (0,10] U (10, 100]
    int nbins = cuts.length - 1;
    double width;
    if (nbins == 0) {
        if (cuts[0] < 2)
            throw new IllegalArgumentException("The number of cuts must be >= 2. Got: " + cuts[0]);
        // in this case, cut the vec into _cuts[0] many pieces of equal length
        nbins = (int) Math.floor(cuts[0]);
        width = (fmax - fmin) / nbins;
        cuts = new double[nbins];
        cuts[0] = fmin - 0.001 * (fmax - fmin);
        for (int i = 1; i < cuts.length; ++i) cuts[i] = (i == cuts.length - 1) ? (fmax + 0.001 * (fmax - fmin)) : (fmin + i * width);
    }
    // if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
    if (labels != null && labels.length != nbins)
        throw new IllegalArgumentException("`labels` vector does not match the number of cuts.");
    // Construct domain names from _labels or bin intervals if _labels is null
    final double[] cutz = cuts;
    // first round _cuts to dig.lab decimals: example floor(2.676*100 + 0.5) / 100
    for (int i = 0; i < cuts.length; ++i) cuts[i] = Math.floor(cuts[i] * Math.pow(10, digits) + 0.5) / Math.pow(10, digits);
    String[][] domains = new String[1][nbins];
    if (labels == null) {
        domains[0][0] = (lowest ? "[" : left(rite)) + cuts[0] + "," + cuts[1] + rite(rite);
        for (int i = 1; i < (cuts.length - 1); ++i) domains[0][i] = left(rite) + cuts[i] + "," + cuts[i + 1] + rite(rite);
    } else
        domains[0] = labels;
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            int rows = c._len;
            for (int r = 0; r < rows; ++r) {
                double x = c.atd(r);
                if (Double.isNaN(x) || (lowest && x < cutz[0]) || (!lowest && (x < cutz[0] || MathUtils.equalsWithinOneSmallUlp(x, cutz[0]))) || (rite && x > cutz[cutz.length - 1]) || (!rite && (x > cutz[cutz.length - 1] || MathUtils.equalsWithinOneSmallUlp(x, cutz[cutz.length - 1]))))
                    nc.addNum(Double.NaN);
                else {
                    for (int i = 1; i < cutz.length; ++i) {
                        if (rite) {
                            if (x <= cutz[i]) {
                                nc.addNum(i - 1);
                                break;
                            }
                        } else if (x < cutz[i]) {
                            nc.addNum(i - 1);
                            break;
                        }
                    }
                }
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr.names(), domains);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16