Search in sources :

Example 1 with ModelMetricsRegression

use of hex.ModelMetricsRegression in project h2o-3 by h2oai.

the class DRFTest method testStochasticDRFEquivalent.

@Test
public void testStochasticDRFEquivalent() {
    Frame tfr = null, vfr = null;
    DRFModel drf = null;
    Scope.enter();
    try {
        tfr = parse_test_file("./smalldata/junit/cars.csv");
        for (String s : new String[] { "name" }) {
            tfr.remove(s).remove();
        }
        DKV.put(tfr);
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        //regression
        parms._response_column = "cylinders";
        parms._seed = 234;
        parms._min_rows = 2;
        parms._max_depth = 5;
        parms._ntrees = 5;
        parms._mtries = 3;
        parms._sample_rate = 0.5f;
        // Build a first model; all remaining models should be equal
        drf = new DRF(parms).trainModel().get();
        ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
        assertEquals(0.12358322821934015, mm.mse(), 1e-4);
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (drf != null)
            drf.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) ModelMetricsRegression(hex.ModelMetricsRegression) Test(org.junit.Test)

Example 2 with ModelMetricsRegression

use of hex.ModelMetricsRegression in project h2o-3 by h2oai.

the class DeepLearningGradientCheck method gradientCheck.

@Test
public void gradientCheck() {
    Frame tfr = null;
    DeepLearningModel dl = null;
    try {
        tfr = parse_test_file("smalldata/glm_test/cancar_logIn.csv");
        for (String s : new String[] { "Merit", "Class" }) {
            Vec f = tfr.vec(s).toCategoricalVec();
            tfr.remove(s).remove();
            tfr.add(s, f);
        }
        DKV.put(tfr);
        tfr.add("Binary", tfr.anyVec().makeZero());
        new MRTask() {

            public void map(Chunk[] c) {
                for (int i = 0; i < c[0]._len; ++i) if (c[0].at8(i) == 1)
                    c[1].set(i, 1);
            }
        }.doAll(tfr.vecs(new String[] { "Class", "Binary" }));
        Vec cv = tfr.vec("Binary").toCategoricalVec();
        tfr.remove("Binary").remove();
        tfr.add("Binary", cv);
        DKV.put(tfr);
        Random rng = new Random(0xDECAF);
        int count = 0;
        int failedcount = 0;
        double maxRelErr = 0;
        double meanRelErr = 0;
        for (DistributionFamily dist : new DistributionFamily[] { DistributionFamily.gaussian, DistributionFamily.laplace, DistributionFamily.quantile, DistributionFamily.huber, // DistributionFamily.modified_huber,
        DistributionFamily.gamma, DistributionFamily.poisson, DistributionFamily.AUTO, DistributionFamily.tweedie, DistributionFamily.multinomial, DistributionFamily.bernoulli }) {
            for (DeepLearningParameters.Activation act : new DeepLearningParameters.Activation[] { //            DeepLearningParameters.Activation.ExpRectifier,
            DeepLearningParameters.Activation.Tanh, DeepLearningParameters.Activation.Rectifier }) {
                for (String response : new String[] { //binary classification
                "Binary", //multi-class
                "Class", //regression
                "Cost" }) {
                    for (boolean adaptive : new boolean[] { true, false }) {
                        for (int miniBatchSize : new int[] { 1 }) {
                            if (response.equals("Class")) {
                                if (dist != DistributionFamily.multinomial && dist != DistributionFamily.AUTO)
                                    continue;
                            } else if (response.equals("Binary")) {
                                if (dist != DistributionFamily.modified_huber && dist != DistributionFamily.bernoulli && dist != DistributionFamily.AUTO)
                                    continue;
                            } else {
                                if (dist == DistributionFamily.multinomial || dist == DistributionFamily.modified_huber || dist == DistributionFamily.bernoulli)
                                    continue;
                            }
                            DeepLearningParameters parms = new DeepLearningParameters();
                            parms._huber_alpha = rng.nextDouble() + 0.1;
                            parms._tweedie_power = 1.01 + rng.nextDouble() * 0.9;
                            parms._quantile_alpha = 0.05 + rng.nextDouble() * 0.9;
                            parms._train = tfr._key;
                            //converge to a reasonable model to avoid too large gradients
                            parms._epochs = 100;
                            parms._l1 = 1e-3;
                            parms._l2 = 1e-3;
                            parms._force_load_balance = false;
                            parms._hidden = new int[] { 10, 10, 10 };
                            //otherwise we introduce small bprop errors
                            parms._fast_mode = false;
                            parms._response_column = response;
                            parms._distribution = dist;
                            parms._max_w2 = 10;
                            parms._seed = 0xaaabbb;
                            parms._activation = act;
                            parms._adaptive_rate = adaptive;
                            parms._rate = 1e-4;
                            parms._momentum_start = 0.9;
                            parms._momentum_stable = 0.99;
                            parms._mini_batch_size = miniBatchSize;
                            //                DeepLearningModelInfo.gradientCheck = null;
                            //tell it what gradient to collect
                            DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(0, 0, 0);
                            // Build a first model; all remaining models should be equal
                            DeepLearning job = new DeepLearning(parms);
                            try {
                                dl = job.trainModel().get();
                                boolean classification = response.equals("Class") || response.equals("Binary");
                                if (!classification) {
                                    Frame p = dl.score(tfr);
                                    hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(dl, tfr);
                                    double resdev = ((ModelMetricsRegression) mm)._mean_residual_deviance;
                                    Log.info("Mean residual deviance: " + resdev);
                                    p.delete();
                                }
                                //golden version
                                DeepLearningModelInfo modelInfo = IcedUtils.deepCopy(dl.model_info());
                                //                Log.info(modelInfo.toStringAll());
                                long before = dl.model_info().checksum_impl();
                                float meanLoss = 0;
                                // loop over every row in the dataset and check that the predictions
                                for (int rId = 0; rId < tfr.numRows(); rId += 1) /*miniBatchSize*/
                                {
                                    // start from scratch - with a clean model
                                    dl.set_model_info(IcedUtils.deepCopy(modelInfo));
                                    final DataInfo di = dl.model_info().data_info();
                                    // populate miniBatch (consecutive rows)
                                    final DataInfo.Row[] rowsMiniBatch = new DataInfo.Row[miniBatchSize];
                                    for (int i = 0; i < rowsMiniBatch.length; ++i) {
                                        if (0 <= rId + i && rId + i < tfr.numRows()) {
                                            rowsMiniBatch[i] = new FrameTask.ExtractDenseRow(di, rId + i).doAll(di._adaptedFrame)._row;
                                        }
                                    }
                                    // loss at weight
                                    long cs = dl.model_info().checksum_impl();
                                    double loss = dl.meanLoss(rowsMiniBatch);
                                    assert (cs == before);
                                    assert (before == dl.model_info().checksum_impl());
                                    meanLoss += loss;
                                    for (int layer = 0; layer <= parms._hidden.length; ++layer) {
                                        int rows = dl.model_info().get_weights(layer).rows();
                                        assert (dl.model_info().get_biases(layer).size() == rows);
                                        for (int row = 0; row < rows; ++row) {
                                            //check bias
                                            if (true) {
                                                // start from scratch - with a clean model
                                                dl.set_model_info(IcedUtils.deepCopy(modelInfo));
                                                // do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
                                                Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
                                                double[] responses = new double[miniBatchSize];
                                                double[] offsets = new double[miniBatchSize];
                                                int n = 0;
                                                for (DataInfo.Row myRow : rowsMiniBatch) {
                                                    if (myRow == null)
                                                        continue;
                                                    ((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
                                                    responses[n] = myRow.response(0);
                                                    offsets[n] = myRow.offset;
                                                    n++;
                                                }
                                                DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
                                                neurons, dl.model_info(), null, true, /*training*/
                                                responses, offsets, n);
                                                // check that we didn't change the model's weights/biases
                                                long after = dl.model_info().checksum_impl();
                                                assert (after == before);
                                                // record the gradient since gradientChecking is enabled
                                                //tell it what gradient to collect
                                                DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, -1);
                                                //update the weights and biases
                                                DeepLearningTask.bpropMiniBatch(neurons, n);
                                                assert (before != dl.model_info().checksum_impl());
                                                // reset the model back to the trained model
                                                dl.set_model_info(IcedUtils.deepCopy(modelInfo));
                                                assert (before == dl.model_info().checksum_impl());
                                                double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
                                                // FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
                                                //                    double actualResponse=myRow.response[0];
                                                //                    double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
                                                //                    if (di._normRespMul != null) {
                                                //                      bpropGradient /= di._normRespMul[0]; //no shift for gradient
                                                //                      actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
                                                //                      predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
                                                //                    }
                                                //                    bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
                                                final double bias = dl.model_info().get_biases(layer).get(row);
                                                //don't make the weight deltas too small, or the float weights "won't notice"
                                                double eps = 1e-4 * Math.abs(bias);
                                                if (eps == 0)
                                                    eps = 1e-6;
                                                // loss at bias + eps
                                                dl.model_info().get_biases(layer).set(row, bias + eps);
                                                double up = dl.meanLoss(rowsMiniBatch);
                                                // loss at bias - eps
                                                dl.model_info().get_biases(layer).set(row, bias - eps);
                                                double down = dl.meanLoss(rowsMiniBatch);
                                                if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
                                                    //relative change in loss function is too small -> skip
                                                    continue;
                                                }
                                                double gradient = ((up - down) / (2. * eps));
                                                double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
                                                count++;
                                                // if either gradient is tiny, check if both are tiny
                                                if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
                                                    //all good
                                                    if (Math.abs(bpropGradient - gradient) < 1e-7)
                                                        continue;
                                                }
                                                meanRelErr += relError;
                                                if (relError > MAX_TOLERANCE) {
                                                    Log.info("\nDistribution: " + dl._parms._distribution);
                                                    Log.info("\nRow: " + rId);
                                                    Log.info("bias (layer " + layer + ", row " + row + "): " + bias + " +/- " + eps);
                                                    Log.info("loss: " + loss);
                                                    Log.info("losses up/down: " + up + " / " + down);
                                                    Log.info("=> Finite differences gradient: " + gradient);
                                                    Log.info("=> Back-propagation gradient  : " + bpropGradient);
                                                    Log.info("=> Relative error             : " + PrettyPrint.formatPct(relError));
                                                    failedcount++;
                                                }
                                            }
                                            int cols = dl.model_info().get_weights(layer).cols();
                                            for (int col = 0; col < cols; ++col) {
                                                if (rng.nextFloat() >= SAMPLE_RATE)
                                                    continue;
                                                // start from scratch - with a clean model
                                                dl.set_model_info(IcedUtils.deepCopy(modelInfo));
                                                // do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
                                                Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
                                                double[] responses = new double[miniBatchSize];
                                                double[] offsets = new double[miniBatchSize];
                                                int n = 0;
                                                for (DataInfo.Row myRow : rowsMiniBatch) {
                                                    if (myRow == null)
                                                        continue;
                                                    ((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
                                                    responses[n] = myRow.response(0);
                                                    offsets[n] = myRow.offset;
                                                    n++;
                                                }
                                                DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
                                                neurons, dl.model_info(), null, true, /*training*/
                                                responses, offsets, n);
                                                // check that we didn't change the model's weights/biases
                                                long after = dl.model_info().checksum_impl();
                                                assert (after == before);
                                                // record the gradient since gradientChecking is enabled
                                                //tell it what gradient to collect
                                                DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, col);
                                                //update the weights
                                                DeepLearningTask.bpropMiniBatch(neurons, n);
                                                assert (before != dl.model_info().checksum_impl());
                                                // reset the model back to the trained model
                                                dl.set_model_info(IcedUtils.deepCopy(modelInfo));
                                                assert (before == dl.model_info().checksum_impl());
                                                double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
                                                // FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
                                                //                    double actualResponse=myRow.response[0];
                                                //                    double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
                                                //                    if (di._normRespMul != null) {
                                                //                      bpropGradient /= di._normRespMul[0]; //no shift for gradient
                                                //                      actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
                                                //                      predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
                                                //                    }
                                                //                    bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
                                                final float weight = dl.model_info().get_weights(layer).get(row, col);
                                                //don't make the weight deltas too small, or the float weights "won't notice"
                                                double eps = 1e-4 * Math.abs(weight);
                                                if (eps == 0)
                                                    eps = 1e-6;
                                                // loss at weight + eps
                                                dl.model_info().get_weights(layer).set(row, col, (float) (weight + eps));
                                                double up = dl.meanLoss(rowsMiniBatch);
                                                // loss at weight - eps
                                                dl.model_info().get_weights(layer).set(row, col, (float) (weight - eps));
                                                double down = dl.meanLoss(rowsMiniBatch);
                                                if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
                                                    //relative change in loss function is too small -> skip
                                                    continue;
                                                }
                                                double gradient = ((up - down) / (2. * eps));
                                                double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
                                                count++;
                                                // if either gradient is tiny, check if both are tiny
                                                if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
                                                    //all good
                                                    if (Math.abs(bpropGradient - gradient) < 1e-7)
                                                        continue;
                                                }
                                                meanRelErr += relError;
                                                if (relError > MAX_TOLERANCE) {
                                                    Log.info("\nDistribution: " + dl._parms._distribution);
                                                    Log.info("\nRow: " + rId);
                                                    Log.info("weight (layer " + layer + ", row " + row + ", col " + col + "): " + weight + " +/- " + eps);
                                                    Log.info("loss: " + loss);
                                                    Log.info("losses up/down: " + up + " / " + down);
                                                    Log.info("=> Finite differences gradient: " + gradient);
                                                    Log.info("=> Back-propagation gradient  : " + bpropGradient);
                                                    Log.info("=> Relative error             : " + PrettyPrint.formatPct(relError));
                                                    failedcount++;
                                                }
                                                //                          Assert.assertTrue(failedcount==0);
                                                maxRelErr = Math.max(maxRelErr, relError);
                                                assert (!Double.isNaN(maxRelErr));
                                            }
                                        }
                                    }
                                }
                                meanLoss /= tfr.numRows();
                                Log.info("Mean loss: " + meanLoss);
                            //                  // FIXME: re-enable this
                            //                  if (parms._l1 == 0 && parms._l2 == 0) {
                            //                    assert(Math.abs(meanLoss-resdev)/Math.abs(resdev) < 1e-5);
                            //                  }
                            } catch (RuntimeException ex) {
                                dl = DKV.getGet(job.dest());
                                if (dl != null)
                                    Assert.assertTrue(dl.model_info().isUnstable());
                                else
                                    Assert.assertTrue(job.isStopped());
                            } finally {
                                if (dl != null)
                                    dl.delete();
                            }
                        }
                    }
                }
            }
        }
        Log.info("Number of tests: " + count);
        Log.info("Number of failed tests: " + failedcount);
        Log.info("Mean. relative error: " + meanRelErr / count);
        Log.info("Max. relative error: " + PrettyPrint.formatPct(maxRelErr));
        Assert.assertTrue("Error too large: " + maxRelErr + " >= " + MAX_TOLERANCE, maxRelErr < MAX_TOLERANCE);
        Assert.assertTrue("Failed count too large: " + failedcount + " > " + MAX_FAILED_COUNT, failedcount <= MAX_FAILED_COUNT);
    } finally {
        if (tfr != null)
            tfr.remove();
    }
}
Also used : Frame(water.fvec.Frame) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) ModelMetricsRegression(hex.ModelMetricsRegression) Random(java.util.Random) FrameTask(hex.FrameTask) DataInfo(hex.DataInfo) DistributionFamily(hex.genmodel.utils.DistributionFamily) Chunk(water.fvec.Chunk) PrettyPrint(water.util.PrettyPrint) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 3 with ModelMetricsRegression

use of hex.ModelMetricsRegression in project h2o-3 by h2oai.

the class DRFTest method testColSamplingPerTree.

@Test
public void testColSamplingPerTree() {
    Frame tfr = null;
    Key[] ksplits = new Key[0];
    try {
        tfr = parse_test_file("./smalldata/gbm_test/ecology_model.csv");
        SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
        // Invoke the job
        sf.exec().get();
        ksplits = sf._destination_frames;
        DRFModel drf = null;
        float[] sample_rates = new float[] { 0.2f, 0.4f, 0.6f, 0.8f, 1.0f };
        float[] col_sample_rates = new float[] { 0.4f, 0.6f, 0.8f, 1.0f };
        float[] col_sample_rates_per_tree = new float[] { 0.4f, 0.6f, 0.8f, 1.0f };
        Map<Double, Triple<Float>> hm = new TreeMap<>();
        for (float sample_rate : sample_rates) {
            for (float col_sample_rate : col_sample_rates) {
                for (float col_sample_rate_per_tree : col_sample_rates_per_tree) {
                    Scope.enter();
                    try {
                        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
                        parms._train = ksplits[0];
                        parms._valid = ksplits[1];
                        //regression
                        parms._response_column = "Angaus";
                        parms._seed = 12345;
                        parms._min_rows = 1;
                        parms._max_depth = 15;
                        parms._ntrees = 2;
                        parms._mtries = Math.max(1, (int) (col_sample_rate * (tfr.numCols() - 1)));
                        parms._col_sample_rate_per_tree = col_sample_rate_per_tree;
                        parms._sample_rate = sample_rate;
                        // Build a first model; all remaining models should be equal
                        DRF job = new DRF(parms);
                        drf = job.trainModel().get();
                        // too slow, but passes (now)
                        //            // Build a POJO, validate same results
                        //            Frame pred = drf.score(tfr);
                        //            Assert.assertTrue(drf.testJavaScoring(tfr,pred,1e-15));
                        //            pred.remove();
                        ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._validation_metrics;
                        hm.put(mm.mse(), new Triple<>(sample_rate, col_sample_rate, col_sample_rate_per_tree));
                    } finally {
                        if (drf != null)
                            drf.delete();
                        Scope.exit();
                    }
                }
            }
        }
        Iterator<Map.Entry<Double, Triple<Float>>> it;
        Triple<Float> last = null;
        // iterator over results (min to max MSE) - best to worst
        for (it = hm.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry<Double, Triple<Float>> n = it.next();
            Log.info("MSE: " + n.getKey() + ", row sample: " + n.getValue().v1 + ", col sample: " + n.getValue().v2 + ", col sample per tree: " + n.getValue().v3);
            last = n.getValue();
        }
    // worst validation MSE should belong to the most overfit case (1.0, 1.0, 1.0)
    //      Assert.assertTrue(last.v1==sample_rates[sample_rates.length-1]);
    //      Assert.assertTrue(last.v2==col_sample_rates[col_sample_rates.length-1]);
    //      Assert.assertTrue(last.v3==col_sample_rates_per_tree[col_sample_rates_per_tree.length-1]);
    } finally {
        if (tfr != null)
            tfr.remove();
        for (Key k : ksplits) if (k != null)
            k.remove();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) ModelMetricsRegression(hex.ModelMetricsRegression) Triple(water.util.Triple) SplitFrame(hex.SplitFrame) Test(org.junit.Test)

Example 4 with ModelMetricsRegression

use of hex.ModelMetricsRegression in project h2o-3 by h2oai.

the class DRFTest method testAutoRebalance.

@Ignore
@Test
public void testAutoRebalance() {
    //First pass to warm up
    boolean warmUp = true;
    if (warmUp) {
        int[] warmUpChunks = { 1, 2, 3, 4, 5 };
        for (int chunk : warmUpChunks) {
            Frame tfr = null;
            Scope.enter();
            try {
                // Load data, hack frames
                tfr = parse_test_file("/Users/ludirehak/Downloads/train.csv.zip");
                DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
                parms._train = tfr._key;
                parms._response_column = "Sales";
                parms._nbins = 1000;
                parms._ntrees = 10;
                parms._max_depth = 20;
                parms._mtries = -1;
                parms._min_rows = 10;
                parms._seed = 1234;
                //          parms._rebalance_me = true;
                //          parms._nchunks = 22;
                // Build a first model; all remaining models should be equal
                DRF job = new DRF(parms);
                DRFModel drf = job.trainModel().get();
                drf.delete();
            } finally {
                if (tfr != null)
                    tfr.remove();
            }
            Scope.exit();
        }
    }
    int[] max_depths = { 2, 5, 10, 15, 20 };
    int[] chunks = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 };
    boolean[] rebalanceMes = { true };
    int[] ntrees = { 10 };
    int totalLength = chunks.length * max_depths.length * rebalanceMes.length * ntrees.length;
    double[] executionTimes = new double[totalLength];
    int[] outputchunks = new int[totalLength];
    int[] outputdepths = new int[totalLength];
    boolean[] outputrebalanceme = new boolean[totalLength];
    int[] outputntrees = new int[totalLength];
    int c = 0;
    for (int max_depth : max_depths) {
        for (int ntree : ntrees) {
            for (boolean rebalanceMe : rebalanceMes) {
                for (int chunk : chunks) {
                    long startTime = System.currentTimeMillis();
                    Scope.enter();
                    // Load data, hack frames
                    Frame tfr = parse_test_file("/Users/ludirehak/Downloads/train.csv.zip");
                    DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
                    parms._train = tfr._key;
                    parms._response_column = "Sales";
                    parms._nbins = 1000;
                    parms._mtries = -1;
                    parms._min_rows = 10;
                    parms._seed = 1234;
                    parms._ntrees = ntree;
                    parms._max_depth = max_depth;
                    //            parms._rebalance_me = rebalanceMe;
                    //            parms._nchunks = chunk;
                    // Build a first model
                    DRF job = new DRF(parms);
                    DRFModel drf = job.trainModel().get();
                    assertEquals(drf._output._ntrees, parms._ntrees);
                    ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
                    int actualChunk = job.train().anyVec().nChunks();
                    drf.delete();
                    tfr.remove();
                    Scope.exit();
                    executionTimes[c] = (System.currentTimeMillis() - startTime) / 1000d;
                    if (!rebalanceMe)
                        assert actualChunk == 22;
                    outputchunks[c] = actualChunk;
                    outputdepths[c] = max_depth;
                    outputrebalanceme[c] = rebalanceMe;
                    outputntrees[c] = drf._output._ntrees;
                    Log.info("Iteration " + (c + 1) + " out of " + executionTimes.length);
                    Log.info(" DEPTH: " + outputdepths[c] + " NTREES: " + outputntrees[c] + " CHUNKS: " + outputchunks[c] + " EXECUTION TIME: " + executionTimes[c] + " Rebalanced: " + rebalanceMe + " WarmedUp: " + warmUp);
                    c++;
                }
            }
        }
    }
    String fileName = "/Users/ludirehak/Desktop/DRFTestRebalance3.txt";
    // legend('topright', legend= c('max_depth',unique(max_depth)),col = 0:length(unique(max_depth)),pch=1);
    try {
        FileWriter fileWriter = new FileWriter(fileName);
        BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
        bufferedWriter.write("max_depth,ntrees,nbins,min_rows,chunks,execution_time,rebalanceMe,warmUp");
        bufferedWriter.newLine();
        for (int i = 0; i < executionTimes.length; i++) {
            bufferedWriter.write(outputdepths[i] + "," + outputntrees[i] + "," + 1000 + "," + 10 + "," + outputchunks[i] + "," + executionTimes[i] + "," + "," + (outputrebalanceme[i] ? 1 : 0) + "," + (warmUp ? 1 : 0));
            bufferedWriter.newLine();
        }
        bufferedWriter.close();
    } catch (Exception e) {
        Log.info("Fail");
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) FileWriter(java.io.FileWriter) ModelMetricsRegression(hex.ModelMetricsRegression) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) BufferedWriter(java.io.BufferedWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

ModelMetricsRegression (hex.ModelMetricsRegression)4 Test (org.junit.Test)4 Frame (water.fvec.Frame)4 SplitFrame (hex.SplitFrame)3 DataInfo (hex.DataInfo)1 FrameTask (hex.FrameTask)1 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)1 DistributionFamily (hex.genmodel.utils.DistributionFamily)1 BufferedWriter (java.io.BufferedWriter)1 FileWriter (java.io.FileWriter)1 Random (java.util.Random)1 Ignore (org.junit.Ignore)1 H2OModelBuilderIllegalArgumentException (water.exceptions.H2OModelBuilderIllegalArgumentException)1 Chunk (water.fvec.Chunk)1 Vec (water.fvec.Vec)1 PrettyPrint (water.util.PrettyPrint)1 Triple (water.util.Triple)1