use of water.fvec.Frame in project h2o-3 by h2oai.
the class GLRMModel method scoreMetricsOnly.
public ModelMetricsGLRM scoreMetricsOnly(Frame frame) {
if (frame == null)
return null;
int ncols = _output._names.length;
// Need [A,X] where A = adapted test frame, X = loading frame
// Note: A is adapted to original training frame
Frame adaptedFr = new Frame(frame);
adaptTestForTrain(adaptedFr, true, false);
assert ncols == adaptedFr.numCols();
// Append loading frame X for calculating XY
Frame fullFrm = new Frame(adaptedFr);
Frame loadingFrm = DKV.get(_output._representation_key).get();
fullFrm.add(loadingFrm);
GLRMScore gs = new GLRMScore(ncols, _parms._k, false).doAll(fullFrm);
// save error metrics based on imputed data
ModelMetrics mm = gs._mb.makeModelMetrics(GLRMModel.this, frame, null, null);
return (ModelMetricsGLRM) mm;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class Score method makeModelMetrics.
// Run after the doAll scoring to convert the MetricsBuilder to a ModelMetrics
ModelMetricsSupervised makeModelMetrics(SharedTreeModel model, Frame fr) {
Frame preds = (model._output.nclasses() == 2 && _computeGainsLift) || model._parms._distribution == DistributionFamily.huber ? model.score(fr) : null;
ModelMetricsSupervised mms = (ModelMetricsSupervised) _mb.makeModelMetrics(model, fr, null, preds);
if (preds != null)
preds.remove();
return mms;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class Storage method toFrame.
/**
* Helper to convert a Matrix into a Frame
* @param m Matrix
* @param key Key for output Frame
* @return Reference to Frame (which is also in DKV)
*/
static Frame toFrame(Matrix m, Key key) {
final int log_rows_per_chunk = Math.max(1, FileVec.DFLT_LOG2_CHUNK_SIZE - (int) Math.floor(Math.log(m.cols()) / Math.log(2.)));
Vec[] v = new Vec[m.cols()];
for (int i = 0; i < m.cols(); ++i) {
v[i] = makeCon(0, m.rows(), log_rows_per_chunk);
}
Frame f = new FrameFiller(m).doAll(new Frame(key, v, true))._fr;
DKV.put(key, f);
return f;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DeepWaterModel method doScoring.
/**
* Score this DeepWater model
* @param fTrain potentially downsampled training data for scoring
* @param fValid potentially downsampled validation data for scoring
* @param jobKey key of the owning job
* @param iteration Map/Reduce iteration count
* @return true if model building is ongoing
*/
boolean doScoring(Frame fTrain, Frame fValid, Key<Job> jobKey, int iteration, boolean finalScoring) {
final long now = System.currentTimeMillis();
final double time_since_last_iter = now - _timeLastIterationEnter;
updateTiming(jobKey);
_timeLastIterationEnter = now;
epoch_counter = (double) model_info().get_processed_total() / training_rows;
boolean keep_running;
// then adjust the auto-tuning parameter 'actual_train_samples_per_iteration' such that the targeted ratio of comm to comp is achieved
if (get_params()._train_samples_per_iteration == -2 && iteration > 1) {
Log.debug("Auto-tuning train_samples_per_iteration.");
if (time_for_iteration_overhead_ms > 10) {
Log.debug(" Time taken for per-iteration comm overhead: " + PrettyPrint.msecs(time_for_iteration_overhead_ms, true));
Log.debug(" Time taken for Map/Reduce iteration: " + PrettyPrint.msecs((long) time_since_last_iter, true));
final double comm_to_work_ratio = time_for_iteration_overhead_ms / time_since_last_iter;
Log.debug(" Ratio of per-iteration comm overhead to computation: " + String.format("%.5f", comm_to_work_ratio));
Log.debug(" target_comm_to_work: " + get_params()._target_ratio_comm_to_comp);
Log.debug("Old value of train_samples_per_iteration: " + actual_train_samples_per_iteration);
double correction = get_params()._target_ratio_comm_to_comp / comm_to_work_ratio;
//it's ok to train up to 2x more training rows per iteration, but not fewer than half.
correction = Math.max(0.5, Math.min(2, correction));
if (Math.abs(correction) < 0.8 || Math.abs(correction) > 1.2) {
//don't correct unless it's significant (avoid slow drift)
actual_train_samples_per_iteration /= correction;
actual_train_samples_per_iteration = Math.max(1, actual_train_samples_per_iteration);
Log.debug("New value of train_samples_per_iteration: " + actual_train_samples_per_iteration);
} else {
Log.debug("Keeping value of train_samples_per_iteration the same (would deviate too little from previous value): " + actual_train_samples_per_iteration);
}
} else {
Log.debug("Iteration overhead is faster than 10 ms. Not modifying train_samples_per_iteration: " + actual_train_samples_per_iteration);
}
}
keep_running = (epoch_counter < get_params()._epochs) && !stopped_early;
final long sinceLastScore = now - _timeLastScoreStart;
// this is potentially slow - only do every so often
if (!keep_running || get_params()._score_each_iteration || (//don't score too often
sinceLastScore > get_params()._score_interval * 1000 && (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore < get_params()._score_duty_cycle)) {
//duty cycle
Log.info(logNvidiaStats());
jobKey.get().update(0, "Scoring on " + fTrain.numRows() + " training samples" + (fValid != null ? (", " + fValid.numRows() + " validation samples") : ""));
final boolean printme = !get_params()._quiet_mode;
_timeLastScoreStart = System.currentTimeMillis();
DeepWaterScoringInfo scoringInfo = new DeepWaterScoringInfo();
scoringInfo.time_stamp_ms = _timeLastScoreStart;
updateTiming(jobKey);
scoringInfo.total_training_time_ms = total_training_time_ms;
scoringInfo.total_scoring_time_ms = total_scoring_time_ms;
scoringInfo.total_setup_time_ms = total_setup_time_ms;
scoringInfo.epoch_counter = epoch_counter;
scoringInfo.iterations = iterations;
scoringInfo.training_samples = (double) model_info().get_processed_total();
scoringInfo.validation = fValid != null;
scoringInfo.score_training_samples = fTrain.numRows();
scoringInfo.score_validation_samples = get_params()._score_validation_samples;
scoringInfo.is_classification = _output.isClassifier();
scoringInfo.is_autoencoder = _output.isAutoencoder();
if (printme)
Log.info("Scoring the model.");
// compute errors
final String m = model_info().toString();
if (m.length() > 0)
Log.info(m);
// For GainsLift and Huber, we need the full predictions to compute the model metrics
boolean needPreds = _output.nclasses() == 2 || /* gains/lift table requires predictions */
get_params()._distribution == DistributionFamily.huber;
// Scoring on training data
ModelMetrics mtrain;
Frame preds = null;
if (needPreds) {
// allocate predictions since they are needed
preds = score(fTrain);
mtrain = ModelMetrics.getFromDKV(this, fTrain);
} else {
// no need to allocate predictions
ModelMetrics.MetricBuilder mb = scoreMetrics(fTrain);
mtrain = mb.makeModelMetrics(this, fTrain, fTrain, null);
}
if (preds != null)
preds.remove();
_output._training_metrics = mtrain;
scoringInfo.scored_train = new ScoreKeeper(mtrain);
ModelMetricsSupervised mm1 = (ModelMetricsSupervised) mtrain;
if (mm1 instanceof ModelMetricsBinomial) {
ModelMetricsBinomial mm = (ModelMetricsBinomial) (mm1);
scoringInfo.training_AUC = mm._auc;
}
if (fTrain.numRows() != training_rows) {
_output._training_metrics._description = "Metrics reported on temporary training frame with " + fTrain.numRows() + " samples";
} else if (fTrain._key != null && fTrain._key.toString().contains("chunks")) {
_output._training_metrics._description = "Metrics reported on temporary (load-balanced) training frame";
} else {
_output._training_metrics._description = "Metrics reported on full training frame";
}
// Scoring on validation data
ModelMetrics mvalid;
if (fValid != null) {
preds = null;
if (needPreds) {
// allocate predictions since they are needed
preds = score(fValid);
mvalid = ModelMetrics.getFromDKV(this, fValid);
} else {
// no need to allocate predictions
ModelMetrics.MetricBuilder mb = scoreMetrics(fValid);
mvalid = mb.makeModelMetrics(this, fValid, fValid, null);
}
if (preds != null)
preds.remove();
_output._validation_metrics = mvalid;
scoringInfo.scored_valid = new ScoreKeeper(mvalid);
if (mvalid != null) {
if (mvalid instanceof ModelMetricsBinomial) {
ModelMetricsBinomial mm = (ModelMetricsBinomial) mvalid;
scoringInfo.validation_AUC = mm._auc;
}
if (fValid.numRows() != validation_rows) {
_output._validation_metrics._description = "Metrics reported on temporary validation frame with " + fValid.numRows() + " samples";
} else if (fValid._key != null && fValid._key.toString().contains("chunks")) {
_output._validation_metrics._description = "Metrics reported on temporary (load-balanced) validation frame";
} else {
_output._validation_metrics._description = "Metrics reported on full validation frame";
}
}
}
// if (get_params()._variable_importances) {
// if (!get_params()._quiet_mode) Log.info("Computing variable importances.");
// throw H2O.unimpl();
// final float[] vi = model_info().computeVariableImportances();
// scoringInfo.variable_importances = new VarImp(vi, Arrays.copyOfRange(model_info().data_info().coefNames(), 0, vi.length));
// }
_timeLastScoreEnd = System.currentTimeMillis();
long scoringTime = _timeLastScoreEnd - _timeLastScoreStart;
total_scoring_time_ms += scoringTime;
updateTiming(jobKey);
// update the scoringInfo object to report proper speed
scoringInfo.total_training_time_ms = total_training_time_ms;
scoringInfo.total_scoring_time_ms = total_scoring_time_ms;
scoringInfo.this_scoring_time_ms = scoringTime;
// enlarge the error array by one, push latest score back
if (this.scoringInfo == null) {
this.scoringInfo = new DeepWaterScoringInfo[] { scoringInfo };
} else {
DeepWaterScoringInfo[] err2 = new DeepWaterScoringInfo[this.scoringInfo.length + 1];
System.arraycopy(this.scoringInfo, 0, err2, 0, this.scoringInfo.length);
err2[err2.length - 1] = scoringInfo;
this.scoringInfo = err2;
}
_output.errors = last_scored();
_output._scoring_history = DeepWaterScoringInfo.createScoringHistoryTable(this.scoringInfo, (null != get_params()._valid), false, _output.getModelCategory(), _output.isAutoencoder());
_output._variable_importances = calcVarImp(last_scored().variable_importances);
_output._model_summary = model_info.createSummaryTable();
// always keep a copy of the best model so far (based on the following criterion)
if (!finalScoring) {
if (actual_best_model_key != null && get_params()._overwrite_with_best_model && (// if we have a best_model in DKV, then compare against its error() (unless it's a different model as judged by the network size)
(DKV.get(actual_best_model_key) != null && !(loss() >= DKV.get(actual_best_model_key).<DeepWaterModel>get().loss())) || // otherwise, compare against our own _bestError
(DKV.get(actual_best_model_key) == null && loss() < _bestLoss))) {
_bestLoss = loss();
model_info.nativeToJava();
putMeAsBestModel(actual_best_model_key);
}
// print the freshly scored model to ASCII
if (keep_running && printme)
Log.info(toString());
if (ScoreKeeper.stopEarly(ScoringInfo.scoreKeepers(scoring_history()), get_params()._stopping_rounds, _output.isClassifier(), get_params()._stopping_metric, get_params()._stopping_tolerance, "model's last", true)) {
Log.info("Convergence detected based on simple moving average of the loss function for the past " + get_params()._stopping_rounds + " scoring events. Model building completed.");
stopped_early = true;
}
if (printme)
Log.info("Time taken for scoring and diagnostics: " + PrettyPrint.msecs(scoringInfo.this_scoring_time_ms, true));
}
}
if (stopped_early) {
// pretend as if we finished all epochs to get the progress bar pretty (especially for N-fold and grid-search)
((Job) DKV.getGet(jobKey)).update((long) (get_params()._epochs * training_rows));
update(jobKey);
return false;
}
progressUpdate(jobKey, keep_running);
//update(jobKey);
return keep_running;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class KMeansModel method predictScoreImpl.
@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
if (!_parms._pred_indicator) {
return super.predictScoreImpl(orig, adaptedFr, destination_key, j, computeMetrics);
} else {
final int len = _output._k[_output._k.length - 1];
String prefix = "cluster_";
Frame adaptFrm = new Frame(adaptedFr);
for (int c = 0; c < len; c++) adaptFrm.add(prefix + Double.toString(c + 1), adaptFrm.anyVec().makeZero());
new MRTask() {
@Override
public void map(Chunk[] chks) {
if (isCancelled() || j != null && j.stop_requested())
return;
double[] tmp = new double[_output._names.length];
double[] preds = new double[len];
for (int row = 0; row < chks[0]._len; row++) {
Arrays.fill(preds, 0);
double[] p = score_indicator(chks, row, tmp, preds);
for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
}
if (j != null)
j.update(1);
}
}.doAll(adaptFrm);
// Return the predicted columns
int x = _output._names.length, y = adaptFrm.numCols();
// this will call vec_impl() and we cannot call the delete() below just yet
Frame f = adaptFrm.extractFrame(x, y);
f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
DKV.put(f);
makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
return f;
}
}
Aggregations