use of water.fvec.C0DChunk in project h2o-3 by h2oai.
the class ScoreBuildHistogram method map.
@Override
public void map(Chunk[] chks) {
final Chunk wrks = chks[_workIdx];
final Chunk nids = chks[_nidIdx];
final Chunk weight = _weightIdx >= 0 ? chks[_weightIdx] : new C0DChunk(1, chks[0].len());
// Pass 1: Score a prior partially-built tree model, and make new Node
// assignments to every row. This involves pulling out the current
// assigned DecidedNode, "scoring" the row against that Node's decision
// criteria, and assigning the row to a new child UndecidedNode (and
// giving it an improved prediction).
int[] nnids = new int[nids._len];
if (// Prior pass exists?
_leaf > 0)
score_decide(chks, nids, nnids);
else
// Just flag all the NA rows
for (int row = 0; row < nids._len; row++) {
if (weight.atd(row) == 0)
continue;
if (isDecidedRow((int) nids.atd(row)))
nnids[row] = DECIDED_ROW;
}
// Pass 2: accumulate all rows, cols into histograms
// if (_subset)
// accum_subset(chks,wrks,weight,nnids); //for debugging - simple code
// else
//generally faster
accum_all(chks, wrks, weight, nnids);
}
use of water.fvec.C0DChunk in project h2o-3 by h2oai.
the class TreeMeasuresCollector method map.
@Override
public void map(Chunk[] chks) {
double[] data = new double[_ncols];
double[] preds = new double[_nclasses + 1];
Chunk cresp = _st.chk_resp(chks);
Chunk weights = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
int nrows = cresp._len;
// preallocate
int[] oob = new int[2 + Math.round((1f - _rate) * nrows * 1.2f + 0.5f)];
int[] soob = null;
// Prepare output data
_nrows = new double[_ntrees];
_votes = _classification ? new double[_ntrees] : null;
_sse = _classification ? null : new float[_ntrees];
// seed for shuffling oob samples
long seedForOob = ShuffleTask.seed(cresp.cidx());
// Start iteration
for (int tidx = 0; tidx < _ntrees; tidx++) {
// tree
// OOB RNG for this tree
Random rng = rngForTree(_trees[tidx], cresp.cidx());
// Collect oob rows and permutate them
// reuse use the same array for sampling
oob = ModelUtils.sampleOOBRows(nrows, _rate, rng, oob);
// Get number of sample rows
int oobcnt = oob[0];
if (_var >= 0) {
if (soob == null || soob.length < oobcnt)
soob = new int[oobcnt];
// Shuffle array and copy results into <code>soob</code>
ArrayUtils.shuffleArray(oob, oobcnt, soob, seedForOob, 1);
}
for (int j = 1; j < 1 + oobcnt; j++) {
int row = oob[j];
double w = weights.atd(row);
// we cannot deal with this row anyhow
if (cresp.isNA(row))
continue;
if (w == 0)
continue;
// 1+i - one free is expected by prediction
for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
// - permute variable
if (_var >= 0)
data[_var] = chks[_var].atd(soob[j - 1]);
else
assert soob == null;
// - score data
Arrays.fill(preds, 0);
// - score only the tree
score0(data, preds, _trees[tidx]);
// - derive a prediction
if (_classification) {
int pred = getPrediction(preds, null, /*FIXME: should use model's _priorClassDistribution*/
data, _threshold);
int actu = (int) cresp.at8(row);
// - collect only correct votes
if (pred == actu)
_votes[tidx] += w;
} else {
/* regression */
// Important!
double pred = preds[0];
double actu = cresp.atd(row);
_sse[tidx] += (actu - pred) * (actu - pred);
}
// - collect rows which were used for voting
_nrows[tidx] += w;
//if (_var<0) System.err.println("VARIMP OOB row: " + (cresp._start+row) + " : " + Arrays.toString(data) + " tree/actu: " + pred + "/" + actu);
}
}
// Clean-up
_trees = null;
}
use of water.fvec.C0DChunk in project h2o-3 by h2oai.
the class VecUtils method numericToStringVec.
/**
* Create a new {@link Vec} of string values from a numeric {@link Vec}.
*
* Currently only uses a default pretty printer. Would be better if
* it accepted a format string PUBDEV-2211
*
* @param src a numeric {@link Vec}
* @return a string {@link Vec}
*/
public static Vec numericToStringVec(Vec src) {
if (src.isCategorical() || src.isUUID())
throw new H2OIllegalValueException("Cannot convert a non-numeric column" + " using numericToStringVec() ", src);
Vec res = new MRTask() {
@Override
public void map(Chunk chk, NewChunk newChk) {
if (chk instanceof C0DChunk) {
// all NAs
for (int i = 0; i < chk._len; i++) newChk.addNA();
} else {
for (int i = 0; i < chk._len; i++) {
if (!chk.isNA(i))
newChk.addStr(PrettyPrint.number(chk, chk.atd(i), 4));
else
newChk.addNA();
}
}
}
}.doAll(Vec.T_STR, src).outputFrame().anyVec();
assert res != null;
return res;
}
use of water.fvec.C0DChunk in project h2o-3 by h2oai.
the class ReconstructTreeState method map.
@Override
public void map(Chunk[] chks) {
double[] data = new double[_ncols];
double[] preds = new double[_nclass + 1];
int ntrees = _trees.length;
Chunk weight = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
Chunk oobt = _st.chk_oobt(chks);
Chunk resp = _st.chk_resp(chks);
for (int tidx = 0; tidx < ntrees; tidx++) {
// tree
// OOB RNG for this tree
Random rng = rngForTree(_trees[tidx], oobt.cidx());
for (int row = 0; row < oobt._len; row++) {
double w = weight.atd(row);
if (w == 0)
continue;
double y = resp.atd(row);
if (Double.isNaN(y))
continue;
boolean rowIsOOB = _OOBEnabled && rng.nextFloat() >= _rate;
if (!_OOBEnabled || rowIsOOB) {
// Make a prediction
for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
Arrays.fill(preds, 0);
score0(data, preds, _trees[tidx]);
// Only for regression, keep consistency
if (_nclass == 1)
preds[1] = preds[0];
// Write tree predictions
for (int c = 0; c < _nclass; c++) {
// over all class
double prediction = preds[1 + c];
if (preds[1 + c] != 0) {
Chunk ctree = _st.chk_tree(chks, c);
double wcount = oobt.atd(row);
if (_OOBEnabled && _nclass >= 2)
//store avg prediction
ctree.set(row, (float) (ctree.atd(row) * wcount + prediction) / (wcount + w));
else
ctree.set(row, (float) (ctree.atd(row) + prediction));
}
}
// Mark oob row and store number of trees voting for this row
if (rowIsOOB)
oobt.set(row, oobt.atd(row) + w);
}
}
}
_st = null;
}
use of water.fvec.C0DChunk in project h2o-3 by h2oai.
the class VecUtils method UUIDToStringVec.
/**
* Create a new {@link Vec} of string values from a UUID {@link Vec}.
*
* String {@link Vec} is the standard hexadecimal representations of a UUID.
*
* @param src a UUID {@link Vec}
* @return a string {@link Vec}
*/
public static Vec UUIDToStringVec(Vec src) {
if (!src.isUUID())
throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
Vec res = new MRTask() {
@Override
public void map(Chunk chk, NewChunk newChk) {
if (chk instanceof C0DChunk) {
// all NAs
for (int i = 0; i < chk._len; i++) newChk.addNA();
} else {
for (int i = 0; i < chk._len; i++) {
if (!chk.isNA(i))
newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
else
newChk.addNA();
}
}
}
}.doAll(Vec.T_STR, src).outputFrame().anyVec();
assert res != null;
return res;
}
Aggregations