use of water.fvec.Frame in project h2o-3 by h2oai.
the class PCAModel method predictScoreImpl.
@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
Frame adaptFrm = new Frame(adaptedFr);
for (int i = 0; i < _parms._k; i++) adaptFrm.add("PC" + String.valueOf(i + 1), adaptFrm.anyVec().makeZero());
new MRTask() {
@Override
public void map(Chunk[] chks) {
if (isCancelled() || j != null && j.stop_requested())
return;
double[] tmp = new double[_output._names.length];
double[] preds = new double[_parms._k];
for (int row = 0; row < chks[0]._len; row++) {
double[] p = score0(chks, row, tmp, preds);
for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
}
if (j != null)
j.update(1);
}
}.doAll(adaptFrm);
// Return the projection into principal component space
int x = _output._names.length, y = adaptFrm.numCols();
// this will call vec_impl() and we cannot call the delete() below just yet
Frame f = adaptFrm.extractFrame(x, y);
f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
DKV.put(f);
makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
return f;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class StackedEnsembleModel method checkAndInheritModelProperties.
public void checkAndInheritModelProperties() {
if (null == _parms._base_models || 0 == _parms._base_models.length)
throw new H2OIllegalArgumentException("When creating a StackedEnsemble you must specify one or more models; found 0.");
Model aModel = null;
boolean beenHere = false;
trainingFrameChecksum = _parms.train().checksum();
for (Key<Model> k : _parms._base_models) {
aModel = DKV.getGet(k);
if (null == aModel) {
Log.warn("Failed to find base model; skipping: " + k);
continue;
}
if (beenHere) {
// check that the base models are all consistent
if (_output._isSupervised ^ aModel.isSupervised())
throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of supervised and unsupervised models: " + Arrays.toString(_parms._base_models));
if (modelCategory != aModel._output.getModelCategory())
throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of different categories of models: " + Arrays.toString(_parms._base_models));
Frame aTrainingFrame = aModel._parms.train();
if (trainingFrameChecksum != aTrainingFrame.checksum())
throw new H2OIllegalArgumentException("Base models are inconsistent: they use different training frames. Found checksums: " + trainingFrameChecksum + " and: " + aTrainingFrame.checksum() + ".");
NonBlockingHashSet<String> aNames = new NonBlockingHashSet<>();
aNames.addAll(Arrays.asList(aModel._output._names));
if (!aNames.equals(this.names))
throw new H2OIllegalArgumentException("Base models are inconsistent: they use different column lists. Found: " + this.names + " and: " + aNames + ".");
NonBlockingHashSet<String> anIgnoredColumns = new NonBlockingHashSet<>();
if (null != aModel._parms._ignored_columns)
anIgnoredColumns.addAll(Arrays.asList(aModel._parms._ignored_columns));
if (!anIgnoredColumns.equals(this.ignoredColumns))
throw new H2OIllegalArgumentException("Base models are inconsistent: they use different ignored_column lists. Found: " + this.ignoredColumns + " and: " + aModel._parms._ignored_columns + ".");
if (!responseColumn.equals(aModel._parms._response_column))
throw new H2OIllegalArgumentException("Base models are inconsistent: they use different response columns. Found: " + responseColumn + " and: " + aModel._parms._response_column + ".");
if (_output._domains.length != aModel._output._domains.length)
throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of different numbers of domains (categorical levels): " + Arrays.toString(_parms._base_models));
if (nfolds != aModel._parms._nfolds)
throw new H2OIllegalArgumentException("Base models are inconsistent: they use different values for nfolds.");
// TODO: loosen this iff _parms._valid or if we add a separate holdout dataset for the ensemble
if (aModel._parms._nfolds < 2)
throw new H2OIllegalArgumentException("Base model does not use cross-validation: " + aModel._parms._nfolds);
// TODO: loosen this iff it's consistent, like if we have a _fold_column
if (aModel._parms._fold_assignment != Modulo)
throw new H2OIllegalArgumentException("Base model does not use Modulo for cross-validation: " + aModel._parms._nfolds);
if (!aModel._parms._keep_cross_validation_predictions)
throw new H2OIllegalArgumentException("Base model does not keep cross-validation predictions: " + aModel._parms._nfolds);
// Hack alert: DRF only does Bernoulli and Gaussian, so only compare _domains.length above.
if (!(aModel instanceof DRFModel) && distributionFamily(aModel) != distributionFamily(this))
Log.warn("Base models are inconsistent; they use different distributions: " + distributionFamily(this) + " and: " + distributionFamily(aModel) + ". Is this intentional?");
// TODO: If we're set to DistributionFamily.AUTO then GLM might auto-conform the response column
// giving us inconsistencies.
} else {
// !beenHere: this is the first base_model
_output._isSupervised = aModel.isSupervised();
this.modelCategory = aModel._output.getModelCategory();
this._dist = new Distribution(distributionFamily(aModel));
_output._domains = Arrays.copyOf(aModel._output._domains, aModel._output._domains.length);
// TODO: set _parms._train to aModel._parms.train()
_output._names = aModel._output._names;
this.names = new NonBlockingHashSet<>();
this.names.addAll(Arrays.asList(aModel._output._names));
this.ignoredColumns = new NonBlockingHashSet<>();
if (null != aModel._parms._ignored_columns)
this.ignoredColumns.addAll(Arrays.asList(aModel._parms._ignored_columns));
// consistent with the base_models:
if (null != this._parms._ignored_columns) {
NonBlockingHashSet<String> ensembleIgnoredColumns = new NonBlockingHashSet<>();
ensembleIgnoredColumns.addAll(Arrays.asList(this._parms._ignored_columns));
if (!ensembleIgnoredColumns.equals(this.ignoredColumns))
throw new H2OIllegalArgumentException("A StackedEnsemble takes its ignored_columns list from the base models. An inconsistent list of ignored_columns was specified for the ensemble model.");
}
responseColumn = aModel._parms._response_column;
if (!responseColumn.equals(_parms._response_column))
throw new H2OIllegalArgumentException("StackedModel response_column must match the response_column of each base model. Found: " + responseColumn + " and: " + _parms._response_column);
nfolds = aModel._parms._nfolds;
_parms._distribution = aModel._parms._distribution;
beenHere = true;
}
}
if (null == aModel)
throw new H2OIllegalArgumentException("When creating a StackedEnsemble you must specify one or more models; " + _parms._base_models.length + " were specified but none of those were found: " + Arrays.toString(_parms._base_models));
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class AggregatorModel method scoreExemplarMembers.
@Override
public Frame scoreExemplarMembers(Key<Frame> destination_key, final int exemplarIdx) {
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i = 0; i < c._len; ++i) nc.addNum(c.at8(i) == _exemplars[exemplarIdx].gid ? 1 : 0, 0);
}
}.doAll(Vec.T_NUM, new Frame(new Vec[] { _exemplar_assignment_vec_key.get() })).outputFrame().anyVec();
Frame orig = _parms.train();
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
vecs[vecs.length - 1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
DKV.put(res);
assert (res.numRows() == _counts[exemplarIdx]);
booleanCol.remove();
return res;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class AggregatorModel method createFrameOfExemplars.
public Frame createFrameOfExemplars(Frame orig, Key destination_key) {
final long[] keep = new long[_exemplars.length];
for (int i = 0; i < keep.length; ++i) keep[i] = _exemplars[i].gid;
Vec exAssignment = _exemplar_assignment_vec_key.get();
// preserve the original row order
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, Chunk c2) {
for (int i = 0; i < keep.length; ++i) {
if (keep[i] < c.start())
continue;
if (keep[i] >= c.start() + c._len)
continue;
c2.set((int) (keep[i] - c.start()), 1);
}
}
}.doAll(new Frame(new Vec[] { exAssignment, exAssignment.makeZero() }))._fr.vec(1);
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
vecs[vecs.length - 1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
booleanCol.remove();
assert (res.numRows() == _exemplars.length);
Vec cnts = res.anyVec().makeZero();
Vec.Writer vw = cnts.open();
for (int i = 0; i < _counts.length; ++i) vw.set(i, _counts[i]);
vw.close();
res.add("counts", cnts);
DKV.put(destination_key, res);
return res;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DeepLearningModel method scoreAutoEncoder.
/**
* Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
* @param frame Original data (can contain response, will be ignored)
* @param destination_key Frame Id for output
* @param reconstruction_error_per_feature whether to return the squared error per feature
* @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
*/
public Frame scoreAutoEncoder(Frame frame, Key destination_key, final boolean reconstruction_error_per_feature) {
if (!get_params()._autoencoder)
throw new H2OIllegalArgumentException("Only for AutoEncoder Deep Learning model.", "");
final int len = _output._names.length;
Frame adaptFrm = new Frame(frame);
adaptTestForTrain(adaptFrm, true, false);
final int outputcols = reconstruction_error_per_feature ? model_info.data_info.fullN() : 1;
Frame mse = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] mse) {
double[] tmp = new double[len];
double[] out = new double[outputcols];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for (int row = 0; row < chks[0]._len; row++) {
for (int i = 0; i < len; i++) tmp[i] = chks[i].atd(row);
score_autoencoder(tmp, out, neurons, false, /*reconstruction*/
reconstruction_error_per_feature);
for (int i = 0; i < outputcols; ++i) mse[i].addNum(out[i]);
}
}
}.doAll(outputcols, Vec.T_NUM, adaptFrm).outputFrame();
String[] names;
if (reconstruction_error_per_feature) {
String[] coefnames = model_info().data_info().coefNames();
assert (outputcols == coefnames.length);
names = new String[outputcols];
for (int i = 0; i < names.length; ++i) {
names[i] = "reconstr_" + coefnames[i] + ".SE";
}
} else {
names = new String[] { "Reconstruction.MSE" };
}
Frame res = new Frame(destination_key, names, mse.vecs());
DKV.put(res);
addModelMetrics(new ModelMetricsAutoEncoder(this, frame, res.numRows(), res.vecs()[0].mean()));
return res;
}
Aggregations