use of water.fvec.Chunk in project h2o-2 by h2oai.
the class DeepLearningModel method scoreAutoEncoder.
/**
* Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
* @param frame Original data (can contain response, will be ignored)
* @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
*/
public Frame scoreAutoEncoder(Frame frame) {
final int len = _names.length;
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame, false, false);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
adaptFrm.add("Reconstruction.MSE", adaptFrm.anyVec().makeZero());
new MRTask2() {
@Override
public void map(Chunk[] chks) {
double[] tmp = new double[len];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for (int row = 0; row < chks[0]._len; row++) {
for (int i = 0; i < _names.length; i++) //original data
tmp[i] = chks[i].at0(row);
//store the per-row reconstruction error (MSE) in the last column
chks[len].set0(row, score_autoencoder(tmp, null, neurons));
}
}
}.doAll(adaptFrm);
// Return just the output columns
int x = _names.length, y = adaptFrm.numCols();
final Frame l2 = adaptFrm.extractFrame(x, y);
onlyAdaptFrm.delete();
return l2;
}
use of water.fvec.Chunk in project h2o-2 by h2oai.
the class DeepLearningModel method score.
/**
* This is an overridden version of Model.score(). Make either a prediction or a reconstruction.
* @param frame Test dataset
* @return A frame containing the prediction or reconstruction
*/
@Override
public Frame score(Frame frame) {
if (!get_params().autoencoder) {
return super.score(frame);
} else {
// Reconstruction
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame, false, false);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
final int len = model_info().data_info().fullN();
String prefix = "reconstr_";
assert (model_info().data_info()._responses == 0);
String[] coefnames = model_info().data_info().coefNames();
assert (len == coefnames.length);
for (int c = 0; c < len; c++) adaptFrm.add(prefix + coefnames[c], adaptFrm.anyVec().makeZero());
new MRTask2() {
@Override
public void map(Chunk[] chks) {
double[] tmp = new double[_names.length];
float[] preds = new float[len];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for (int row = 0; row < chks[0]._len; row++) {
float[] p = score_autoencoder(chks, row, tmp, preds, neurons);
for (int c = 0; c < preds.length; c++) chks[_names.length + c].set0(row, p[c]);
}
}
}.doAll(adaptFrm);
// Return the predicted columns
int x = _names.length, y = adaptFrm.numCols();
//this will call vec_impl() and we cannot call the delete() below just yet
Frame f = adaptFrm.extractFrame(x, y);
onlyAdaptFrm.delete();
return f;
}
}
use of water.fvec.Chunk in project h2o-2 by h2oai.
the class OOBScorer method map.
@Override
public void map(Chunk[] chks) {
double[] data = new double[_ncols];
float[] preds = new float[_nclass + 1];
int ntrees = _trees.length;
Chunk coobt = chk_oobt(chks);
Chunk cys = chk_resp(chks);
for (int tidx = 0; tidx < ntrees; tidx++) {
// tree
// OOB RNG for this tree
Random rng = rngForTree(_trees[tidx], coobt.cidx());
for (int row = 0; row < coobt._len; row++) {
if (rng.nextFloat() >= _rate || Double.isNaN(cys.at0(row))) {
// Mark oob row and store number of trees voting for this row (only for regression)
coobt.set0(row, _nclass > 1 ? 1 : coobt.at0(row) + 1);
// Make a prediction
for (int i = 0; i < _ncols; i++) data[i] = chks[i].at0(row);
Arrays.fill(preds, 0);
score0(data, preds, _trees[tidx]);
// Only for regression, keep consistency
if (_nclass == 1)
preds[1] = preds[0];
// Write tree predictions
for (int c = 0; c < _nclass; c++) {
// over all class
if (preds[1 + c] != 0) {
Chunk ctree = chk_tree(chks, c);
ctree.set0(row, (float) (ctree.at0(row) + preds[1 + c]));
}
}
}
}
}
}
use of water.fvec.Chunk in project h2o-2 by h2oai.
the class TreeMeasuresCollector method map.
@Override
public void map(Chunk[] chks) {
double[] data = new double[_ncols];
float[] preds = new float[_nclasses + 1];
Chunk cresp = chk_resp(chks);
int nrows = cresp._len;
// preallocate
int[] oob = new int[2 + Math.round((1f - _rate) * nrows * 1.2f + 0.5f)];
int[] soob = null;
// Prepare output data
_nrows = new long[_ntrees];
_votes = _classification ? new long[_ntrees] : null;
_sse = _classification ? null : new float[_ntrees];
// seed for shuffling oob samples
long seedForOob = ShuffleTask.seed(cresp.cidx());
// Start iteration
for (int tidx = 0; tidx < _ntrees; tidx++) {
// tree
// OOB RNG for this tree
Random rng = rngForTree(_trees[tidx], cresp.cidx());
// Collect oob rows and permutate them
// reuse use the same array for sampling
oob = ModelUtils.sampleOOBRows(nrows, _rate, rng, oob);
// Get number of sample rows
int oobcnt = oob[0];
if (_var >= 0) {
if (soob == null || soob.length < oobcnt)
soob = new int[oobcnt];
// Shuffle array and copy results into <code>soob</code>
Utils.shuffleArray(oob, oobcnt, soob, seedForOob, 1);
}
for (int j = 1; j < 1 + oobcnt; j++) {
int row = oob[j];
// we cannot deal with this row anyhow
if (cresp.isNA0(row))
continue;
// 1+i - one free is expected by prediction
for (int i = 0; i < _ncols; i++) data[i] = chks[i].at0(row);
// - permute variable
if (_var >= 0)
data[_var] = chks[_var].at0(soob[j - 1]);
else
assert soob == null;
// - score data
Arrays.fill(preds, 0);
// - score only the tree
score0(data, preds, _trees[tidx]);
// - derive a prediction
if (_classification) {
int pred = ModelUtils.getPrediction(preds, data);
int actu = (int) cresp.at80(row);
// - collect only correct votes
if (pred == actu)
_votes[tidx]++;
} else {
/* regression */
// Important!
float pred = preds[0];
float actu = (float) cresp.at0(row);
_sse[tidx] += (actu - pred) * (actu - pred);
}
// - collect rows which were used for voting
_nrows[tidx]++;
//if (_var<0) System.err.println("VARIMP OOB row: " + (cresp._start+row) + " : " + Arrays.toString(data) + " tree/actu: " + pred + "/" + actu);
}
}
// Clean-up
_trees = null;
}
use of water.fvec.Chunk in project h2o-2 by h2oai.
the class VecChunkDemo method frame_001.
@Test
public void frame_001() {
String fileName = "../smalldata/iris/iris.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Value v = DKV.get(okey);
Frame f = v.get();
Log.info("frame : " + f);
int len = f.numCols();
for (int i = 0; i < len; i++) {
Log.info("vector :" + i);
// looping through the vectors of a frame and printing specifics
Vec vv = f.vec(i);
Log.info("vector summary :" + vv);
Log.info("vector length :" + vv.length());
Log.info("vector group :" + vv.group());
Log.info("vector na count :" + vv.naCnt());
// null if not enum
Log.info("vector domain null if not enum:" + vv.domain());
int cardinality = vv.cardinality();
Log.info("vector cardianlity :" + vv.cardinality());
if (cardinality != -1) {
for (int j = 0; j < cardinality; j++) Log.info("labels :" + vv.domain(j));
}
//gives the element at that row; count starts from 0.
Log.info("vector value at row 50 :" + vv.at(51));
int chunk_count = vv.nChunks();
Log.info("chunk count :" + chunk_count);
Chunk c = vv.chunkForRow(100);
Log.info("chunk for row 100 :" + c);
}
}
Aggregations