Search in sources :

Example 16 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GLRMModel method scoreMetricsOnly.

public ModelMetricsGLRM scoreMetricsOnly(Frame frame) {
    if (frame == null)
        return null;
    int ncols = _output._names.length;
    // Need [A,X] where A = adapted test frame, X = loading frame
    // Note: A is adapted to original training frame
    Frame adaptedFr = new Frame(frame);
    adaptTestForTrain(adaptedFr, true, false);
    assert ncols == adaptedFr.numCols();
    // Append loading frame X for calculating XY
    Frame fullFrm = new Frame(adaptedFr);
    Frame loadingFrm = DKV.get(_output._representation_key).get();
    fullFrm.add(loadingFrm);
    GLRMScore gs = new GLRMScore(ncols, _parms._k, false).doAll(fullFrm);
    // save error metrics based on imputed data
    ModelMetrics mm = gs._mb.makeModelMetrics(GLRMModel.this, frame, null, null);
    return (ModelMetricsGLRM) mm;
}
Also used : Frame(water.fvec.Frame)

Example 17 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class Score method makeModelMetrics.

// Run after the doAll scoring to convert the MetricsBuilder to a ModelMetrics
ModelMetricsSupervised makeModelMetrics(SharedTreeModel model, Frame fr) {
    Frame preds = (model._output.nclasses() == 2 && _computeGainsLift) || model._parms._distribution == DistributionFamily.huber ? model.score(fr) : null;
    ModelMetricsSupervised mms = (ModelMetricsSupervised) _mb.makeModelMetrics(model, fr, null, preds);
    if (preds != null)
        preds.remove();
    return mms;
}
Also used : Frame(water.fvec.Frame)

Example 18 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class Storage method toFrame.

/**
   * Helper to convert a Matrix into a Frame
   * @param m Matrix
   * @param key Key for output Frame
   * @return Reference to Frame (which is also in DKV)
   */
static Frame toFrame(Matrix m, Key key) {
    final int log_rows_per_chunk = Math.max(1, FileVec.DFLT_LOG2_CHUNK_SIZE - (int) Math.floor(Math.log(m.cols()) / Math.log(2.)));
    Vec[] v = new Vec[m.cols()];
    for (int i = 0; i < m.cols(); ++i) {
        v[i] = makeCon(0, m.rows(), log_rows_per_chunk);
    }
    Frame f = new FrameFiller(m).doAll(new Frame(key, v, true))._fr;
    DKV.put(key, f);
    return f;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) FileVec(water.fvec.FileVec)

Example 19 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DeepWaterModel method doScoring.

/**
   * Score this DeepWater model
   * @param fTrain potentially downsampled training data for scoring
   * @param fValid  potentially downsampled validation data for scoring
   * @param jobKey key of the owning job
   * @param iteration Map/Reduce iteration count
   * @return true if model building is ongoing
   */
boolean doScoring(Frame fTrain, Frame fValid, Key<Job> jobKey, int iteration, boolean finalScoring) {
    final long now = System.currentTimeMillis();
    final double time_since_last_iter = now - _timeLastIterationEnter;
    updateTiming(jobKey);
    _timeLastIterationEnter = now;
    epoch_counter = (double) model_info().get_processed_total() / training_rows;
    boolean keep_running;
    // then adjust the auto-tuning parameter 'actual_train_samples_per_iteration' such that the targeted ratio of comm to comp is achieved
    if (get_params()._train_samples_per_iteration == -2 && iteration > 1) {
        Log.debug("Auto-tuning train_samples_per_iteration.");
        if (time_for_iteration_overhead_ms > 10) {
            Log.debug("  Time taken for per-iteration comm overhead: " + PrettyPrint.msecs(time_for_iteration_overhead_ms, true));
            Log.debug("  Time taken for Map/Reduce iteration: " + PrettyPrint.msecs((long) time_since_last_iter, true));
            final double comm_to_work_ratio = time_for_iteration_overhead_ms / time_since_last_iter;
            Log.debug("  Ratio of per-iteration comm overhead to computation: " + String.format("%.5f", comm_to_work_ratio));
            Log.debug("  target_comm_to_work: " + get_params()._target_ratio_comm_to_comp);
            Log.debug("Old value of train_samples_per_iteration: " + actual_train_samples_per_iteration);
            double correction = get_params()._target_ratio_comm_to_comp / comm_to_work_ratio;
            //it's ok to train up to 2x more training rows per iteration, but not fewer than half.
            correction = Math.max(0.5, Math.min(2, correction));
            if (Math.abs(correction) < 0.8 || Math.abs(correction) > 1.2) {
                //don't correct unless it's significant (avoid slow drift)
                actual_train_samples_per_iteration /= correction;
                actual_train_samples_per_iteration = Math.max(1, actual_train_samples_per_iteration);
                Log.debug("New value of train_samples_per_iteration: " + actual_train_samples_per_iteration);
            } else {
                Log.debug("Keeping value of train_samples_per_iteration the same (would deviate too little from previous value): " + actual_train_samples_per_iteration);
            }
        } else {
            Log.debug("Iteration overhead is faster than 10 ms. Not modifying train_samples_per_iteration: " + actual_train_samples_per_iteration);
        }
    }
    keep_running = (epoch_counter < get_params()._epochs) && !stopped_early;
    final long sinceLastScore = now - _timeLastScoreStart;
    // this is potentially slow - only do every so often
    if (!keep_running || get_params()._score_each_iteration || (//don't score too often
    sinceLastScore > get_params()._score_interval * 1000 && (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore < get_params()._score_duty_cycle)) {
        //duty cycle
        Log.info(logNvidiaStats());
        jobKey.get().update(0, "Scoring on " + fTrain.numRows() + " training samples" + (fValid != null ? (", " + fValid.numRows() + " validation samples") : ""));
        final boolean printme = !get_params()._quiet_mode;
        _timeLastScoreStart = System.currentTimeMillis();
        DeepWaterScoringInfo scoringInfo = new DeepWaterScoringInfo();
        scoringInfo.time_stamp_ms = _timeLastScoreStart;
        updateTiming(jobKey);
        scoringInfo.total_training_time_ms = total_training_time_ms;
        scoringInfo.total_scoring_time_ms = total_scoring_time_ms;
        scoringInfo.total_setup_time_ms = total_setup_time_ms;
        scoringInfo.epoch_counter = epoch_counter;
        scoringInfo.iterations = iterations;
        scoringInfo.training_samples = (double) model_info().get_processed_total();
        scoringInfo.validation = fValid != null;
        scoringInfo.score_training_samples = fTrain.numRows();
        scoringInfo.score_validation_samples = get_params()._score_validation_samples;
        scoringInfo.is_classification = _output.isClassifier();
        scoringInfo.is_autoencoder = _output.isAutoencoder();
        if (printme)
            Log.info("Scoring the model.");
        // compute errors
        final String m = model_info().toString();
        if (m.length() > 0)
            Log.info(m);
        // For GainsLift and Huber, we need the full predictions to compute the model metrics
        boolean needPreds = _output.nclasses() == 2 || /* gains/lift table requires predictions */
        get_params()._distribution == DistributionFamily.huber;
        // Scoring on training data
        ModelMetrics mtrain;
        Frame preds = null;
        if (needPreds) {
            // allocate predictions since they are needed
            preds = score(fTrain);
            mtrain = ModelMetrics.getFromDKV(this, fTrain);
        } else {
            // no need to allocate predictions
            ModelMetrics.MetricBuilder mb = scoreMetrics(fTrain);
            mtrain = mb.makeModelMetrics(this, fTrain, fTrain, null);
        }
        if (preds != null)
            preds.remove();
        _output._training_metrics = mtrain;
        scoringInfo.scored_train = new ScoreKeeper(mtrain);
        ModelMetricsSupervised mm1 = (ModelMetricsSupervised) mtrain;
        if (mm1 instanceof ModelMetricsBinomial) {
            ModelMetricsBinomial mm = (ModelMetricsBinomial) (mm1);
            scoringInfo.training_AUC = mm._auc;
        }
        if (fTrain.numRows() != training_rows) {
            _output._training_metrics._description = "Metrics reported on temporary training frame with " + fTrain.numRows() + " samples";
        } else if (fTrain._key != null && fTrain._key.toString().contains("chunks")) {
            _output._training_metrics._description = "Metrics reported on temporary (load-balanced) training frame";
        } else {
            _output._training_metrics._description = "Metrics reported on full training frame";
        }
        // Scoring on validation data
        ModelMetrics mvalid;
        if (fValid != null) {
            preds = null;
            if (needPreds) {
                // allocate predictions since they are needed
                preds = score(fValid);
                mvalid = ModelMetrics.getFromDKV(this, fValid);
            } else {
                // no need to allocate predictions
                ModelMetrics.MetricBuilder mb = scoreMetrics(fValid);
                mvalid = mb.makeModelMetrics(this, fValid, fValid, null);
            }
            if (preds != null)
                preds.remove();
            _output._validation_metrics = mvalid;
            scoringInfo.scored_valid = new ScoreKeeper(mvalid);
            if (mvalid != null) {
                if (mvalid instanceof ModelMetricsBinomial) {
                    ModelMetricsBinomial mm = (ModelMetricsBinomial) mvalid;
                    scoringInfo.validation_AUC = mm._auc;
                }
                if (fValid.numRows() != validation_rows) {
                    _output._validation_metrics._description = "Metrics reported on temporary validation frame with " + fValid.numRows() + " samples";
                } else if (fValid._key != null && fValid._key.toString().contains("chunks")) {
                    _output._validation_metrics._description = "Metrics reported on temporary (load-balanced) validation frame";
                } else {
                    _output._validation_metrics._description = "Metrics reported on full validation frame";
                }
            }
        }
        //      if (get_params()._variable_importances) {
        //        if (!get_params()._quiet_mode) Log.info("Computing variable importances.");
        //        throw H2O.unimpl();
        //        final float[] vi = model_info().computeVariableImportances();
        //        scoringInfo.variable_importances = new VarImp(vi, Arrays.copyOfRange(model_info().data_info().coefNames(), 0, vi.length));
        //      }
        _timeLastScoreEnd = System.currentTimeMillis();
        long scoringTime = _timeLastScoreEnd - _timeLastScoreStart;
        total_scoring_time_ms += scoringTime;
        updateTiming(jobKey);
        // update the scoringInfo object to report proper speed
        scoringInfo.total_training_time_ms = total_training_time_ms;
        scoringInfo.total_scoring_time_ms = total_scoring_time_ms;
        scoringInfo.this_scoring_time_ms = scoringTime;
        // enlarge the error array by one, push latest score back
        if (this.scoringInfo == null) {
            this.scoringInfo = new DeepWaterScoringInfo[] { scoringInfo };
        } else {
            DeepWaterScoringInfo[] err2 = new DeepWaterScoringInfo[this.scoringInfo.length + 1];
            System.arraycopy(this.scoringInfo, 0, err2, 0, this.scoringInfo.length);
            err2[err2.length - 1] = scoringInfo;
            this.scoringInfo = err2;
        }
        _output.errors = last_scored();
        _output._scoring_history = DeepWaterScoringInfo.createScoringHistoryTable(this.scoringInfo, (null != get_params()._valid), false, _output.getModelCategory(), _output.isAutoencoder());
        _output._variable_importances = calcVarImp(last_scored().variable_importances);
        _output._model_summary = model_info.createSummaryTable();
        // always keep a copy of the best model so far (based on the following criterion)
        if (!finalScoring) {
            if (actual_best_model_key != null && get_params()._overwrite_with_best_model && (// if we have a best_model in DKV, then compare against its error() (unless it's a different model as judged by the network size)
            (DKV.get(actual_best_model_key) != null && !(loss() >= DKV.get(actual_best_model_key).<DeepWaterModel>get().loss())) || // otherwise, compare against our own _bestError
            (DKV.get(actual_best_model_key) == null && loss() < _bestLoss))) {
                _bestLoss = loss();
                model_info.nativeToJava();
                putMeAsBestModel(actual_best_model_key);
            }
            // print the freshly scored model to ASCII
            if (keep_running && printme)
                Log.info(toString());
            if (ScoreKeeper.stopEarly(ScoringInfo.scoreKeepers(scoring_history()), get_params()._stopping_rounds, _output.isClassifier(), get_params()._stopping_metric, get_params()._stopping_tolerance, "model's last", true)) {
                Log.info("Convergence detected based on simple moving average of the loss function for the past " + get_params()._stopping_rounds + " scoring events. Model building completed.");
                stopped_early = true;
            }
            if (printme)
                Log.info("Time taken for scoring and diagnostics: " + PrettyPrint.msecs(scoringInfo.this_scoring_time_ms, true));
        }
    }
    if (stopped_early) {
        // pretend as if we finished all epochs to get the progress bar pretty (especially for N-fold and grid-search)
        ((Job) DKV.getGet(jobKey)).update((long) (get_params()._epochs * training_rows));
        update(jobKey);
        return false;
    }
    progressUpdate(jobKey, keep_running);
    //update(jobKey);
    return keep_running;
}
Also used : Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString)

Example 20 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class KMeansModel method predictScoreImpl.

@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
    if (!_parms._pred_indicator) {
        return super.predictScoreImpl(orig, adaptedFr, destination_key, j, computeMetrics);
    } else {
        final int len = _output._k[_output._k.length - 1];
        String prefix = "cluster_";
        Frame adaptFrm = new Frame(adaptedFr);
        for (int c = 0; c < len; c++) adaptFrm.add(prefix + Double.toString(c + 1), adaptFrm.anyVec().makeZero());
        new MRTask() {

            @Override
            public void map(Chunk[] chks) {
                if (isCancelled() || j != null && j.stop_requested())
                    return;
                double[] tmp = new double[_output._names.length];
                double[] preds = new double[len];
                for (int row = 0; row < chks[0]._len; row++) {
                    Arrays.fill(preds, 0);
                    double[] p = score_indicator(chks, row, tmp, preds);
                    for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
                }
                if (j != null)
                    j.update(1);
            }
        }.doAll(adaptFrm);
        // Return the predicted columns
        int x = _output._names.length, y = adaptFrm.numCols();
        // this will call vec_impl() and we cannot call the delete() below just yet
        Frame f = adaptFrm.extractFrame(x, y);
        f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
        DKV.put(f);
        makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
        return f;
    }
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16