use of water.fvec.Chunk in project h2o-3 by h2oai.
the class SVDModel method predictScoreImpl.
@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
Frame adaptFrm = new Frame(adaptedFr);
for (int i = 0; i < _parms._nv; i++) adaptFrm.add("PC" + String.valueOf(i + 1), adaptFrm.anyVec().makeZero());
new MRTask() {
@Override
public void map(Chunk[] chks) {
if (isCancelled() || j != null && j.stop_requested())
return;
double[] tmp = new double[_output._names.length];
double[] preds = new double[_parms._nv];
for (int row = 0; row < chks[0]._len; row++) {
double[] p = score0(chks, row, tmp, preds);
for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
}
if (j != null)
j.update(1);
}
}.doAll(adaptFrm);
// Return the projection into right singular vector (V) space
int x = _output._names.length, y = adaptFrm.numCols();
// this will call vec_impl() and we cannot call the delete() below just yet
Frame f = adaptFrm.extractFrame(x, y);
f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
DKV.put(f);
makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
return f;
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class ReconstructTreeState method map.
@Override
public void map(Chunk[] chks) {
double[] data = new double[_ncols];
double[] preds = new double[_nclass + 1];
int ntrees = _trees.length;
Chunk weight = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
Chunk oobt = _st.chk_oobt(chks);
Chunk resp = _st.chk_resp(chks);
for (int tidx = 0; tidx < ntrees; tidx++) {
// tree
// OOB RNG for this tree
Random rng = rngForTree(_trees[tidx], oobt.cidx());
for (int row = 0; row < oobt._len; row++) {
double w = weight.atd(row);
if (w == 0)
continue;
double y = resp.atd(row);
if (Double.isNaN(y))
continue;
boolean rowIsOOB = _OOBEnabled && rng.nextFloat() >= _rate;
if (!_OOBEnabled || rowIsOOB) {
// Make a prediction
for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
Arrays.fill(preds, 0);
score0(data, preds, _trees[tidx]);
// Only for regression, keep consistency
if (_nclass == 1)
preds[1] = preds[0];
// Write tree predictions
for (int c = 0; c < _nclass; c++) {
// over all class
double prediction = preds[1 + c];
if (preds[1 + c] != 0) {
Chunk ctree = _st.chk_tree(chks, c);
double wcount = oobt.atd(row);
if (_OOBEnabled && _nclass >= 2)
//store avg prediction
ctree.set(row, (float) (ctree.atd(row) * wcount + prediction) / (wcount + w));
else
ctree.set(row, (float) (ctree.atd(row) + prediction));
}
}
// Mark oob row and store number of trees voting for this row
if (rowIsOOB)
oobt.set(row, oobt.atd(row) + w);
}
}
}
_st = null;
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class VecUtils method categoricalToInt.
/**
* Create a new {@link Vec} of numeric values from a categorical {@link Vec}.
*
* If the first value in the domain of the src Vec is a stringified ints,
* then it will use those ints. Otherwise, it will use the raw enumeration level mapping.
* If the domain is stringified ints, then all of the domain must be able to be parsed as
* an int. If it cannot be parsed as such, a NumberFormatException will be caught and
* rethrown as an H2OIllegalArgumentException that declares the illegal domain value.
* Otherwise, the this pointer is copied to a new Vec whose domain is null.
*
* The magic of this method should be eliminated. It should just use enumeration level
* maps. If the user wants domains to be used, call categoricalDomainsToNumeric().
* PUBDEV-2209
*
* @param src a categorical {@link Vec}
* @return a numeric {@link Vec}
*/
public static Vec categoricalToInt(final Vec src) {
if (src.isInt() && (src.domain() == null || src.domain().length == 0))
return copyOver(src, Vec.T_NUM, null);
if (!src.isCategorical())
throw new IllegalArgumentException("categoricalToInt conversion only works on categorical columns.");
// check if the 1st lvl of the domain can be parsed as int
boolean useDomain = false;
Vec newVec = copyOver(src, Vec.T_NUM, null);
try {
Integer.parseInt(src.domain()[0]);
useDomain = true;
} catch (NumberFormatException e) {
// makeCopy and return...
}
if (useDomain) {
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) if (!c.isNA(i))
c.set(i, Integer.parseInt(src.domain()[(int) c.at8(i)]));
}
}.doAll(newVec);
}
return newVec;
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class VecUtils method UUIDToStringVec.
/**
* Create a new {@link Vec} of string values from a UUID {@link Vec}.
*
* String {@link Vec} is the standard hexadecimal representations of a UUID.
*
* @param src a UUID {@link Vec}
* @return a string {@link Vec}
*/
public static Vec UUIDToStringVec(Vec src) {
if (!src.isUUID())
throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
Vec res = new MRTask() {
@Override
public void map(Chunk chk, NewChunk newChk) {
if (chk instanceof C0DChunk) {
// all NAs
for (int i = 0; i < chk._len; i++) newChk.addNA();
} else {
for (int i = 0; i < chk._len; i++) {
if (!chk.isNA(i))
newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
else
newChk.addNA();
}
}
}
}.doAll(Vec.T_STR, src).outputFrame().anyVec();
assert res != null;
return res;
}
use of water.fvec.Chunk in project h2o-2 by h2oai.
the class DeepLearningModel method scoreDeepFeatures.
/**
* Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
* @param frame Original data (can contain response, will be ignored)
* @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
*/
public Frame scoreDeepFeatures(Frame frame, final int layer) {
assert (layer >= 0 && layer < model_info().get_params().hidden.length);
final int len = nfeatures();
Vec resp = null;
if (isSupervised()) {
int ridx = frame.find(responseName());
if (ridx != -1) {
// drop the response for scoring!
frame = new Frame(frame);
resp = frame.vecs()[ridx];
frame.remove(ridx);
}
}
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame, false, false);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
//create new features, will be dense
final int features = model_info().get_params().hidden[layer];
Vec[] vecs = adaptFrm.anyVec().makeZeros(features);
for (int j = 0; j < features; ++j) {
adaptFrm.add("DF.C" + (j + 1), vecs[j]);
}
new MRTask2() {
@Override
public void map(Chunk[] chks) {
double[] tmp = new double[len];
float[] df = new float[features];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for (int row = 0; row < chks[0]._len; row++) {
for (int i = 0; i < len; i++) tmp[i] = chks[i].at0(row);
((Neurons.Input) neurons[0]).setInput(-1, tmp);
DeepLearningTask.step(-1, neurons, model_info, false, null);
//extract the layer-th hidden feature
float[] out = neurons[layer + 1]._a.raw();
for (int c = 0; c < df.length; c++) chks[_names.length + c].set0(row, out[c]);
}
}
}.doAll(adaptFrm);
// Return just the output columns
int x = _names.length, y = adaptFrm.numCols();
Frame ret = adaptFrm.extractFrame(x, y);
onlyAdaptFrm.delete();
if (resp != null)
ret.prepend(responseName(), resp);
return ret;
}
Aggregations