use of water.fvec.Vec in project h2o-3 by h2oai.
the class GLRM method initLoss.
/** Validate all Loss-related parameters, and fill in the `_lossFunc` array. */
private void initLoss() {
int num_loss_by_cols = _parms._loss_by_col == null ? 0 : _parms._loss_by_col.length;
int num_loss_by_cols_idx = _parms._loss_by_col_idx == null ? 0 : _parms._loss_by_col_idx.length;
// First validate the parameters that do not require access to the training frame
if (_parms._period <= 0)
error("_period", "_period must be a positive integer");
if (!_parms._loss.isForNumeric())
error("_loss", _parms._loss + " is not a numeric loss function");
if (!_parms._multi_loss.isForCategorical())
error("_multi_loss", _parms._multi_loss + " is not a multivariate loss function");
if (num_loss_by_cols != num_loss_by_cols_idx && num_loss_by_cols_idx > 0)
error("_loss_by_col", "Sizes of arrays _loss_by_col and _loss_by_col_idx must be the same");
if (_train == null)
return;
_binaryColumnIndices = new ArrayList<Integer>();
// Initialize the default loss functions for each column
// Note: right now for binary columns `.isCategorical()` returns true. It has the undesired consequence that
// such variables will get categorical loss function, and will get expanded into 2 columns.
_lossFunc = new GlrmLoss[_ncolA];
for (int i = 0; i < _ncolA; i++) {
Vec vi = _train.vec(i);
_lossFunc[i] = vi.isCategorical() ? _parms._multi_loss : _parms._loss;
}
// grab original frame column names before change
String[] origColumnNames = _parms.train().names();
ArrayList<String> newColumnNames = new ArrayList<String>(Arrays.asList(_train._names));
// If _loss_by_col is provided, then override loss functions on the specified columns
if (num_loss_by_cols > 0) {
if (num_loss_by_cols_idx == 0) {
if (num_loss_by_cols == origColumnNames.length)
assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
else
error("_loss_by_col", "Number of override loss functions should be the same as the " + "number of columns in the input frame; or otherwise an explicit _loss_by_col_idx should be " + "provided.");
}
if (num_loss_by_cols_idx == num_loss_by_cols)
assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
// Otherwise we have already reported an error at the start of this method
}
// Check that all loss functions correspond to their actual type
for (int i = 0; i < _ncolA; i++) {
Vec vi = _train.vec(i);
GlrmLoss lossi = _lossFunc[i];
if (vi.isNumeric()) {
// numeric columns
if (!vi.isBinary()) {
// non-binary numeric columns
if (!lossi.isForNumeric())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to numeric column " + i);
} else {
// binary numeric columns
if (!lossi.isForBinary() && !lossi.isForNumeric()) {
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
}
}
} else if (vi.isCategorical()) {
// categorical columns
if (vi.isBinary()) {
// categorical binary columns
if (!lossi.isForBinary() && !lossi.isForCategorical())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
else if (lossi.isForBinary())
// collect column indices storing binary columns with binary loss function.
_binaryColumnIndices.add(i);
} else {
// categorical non-binary columns
if (!lossi.isForCategorical())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to categorical column " + i);
}
}
// different columns.
if (lossi == GlrmLoss.Periodic)
lossi.setParameters(_parms._period);
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class Storage method toFrame.
/**
* Helper to convert a Matrix into a Frame
* @param m Matrix
* @param key Key for output Frame
* @return Reference to Frame (which is also in DKV)
*/
static Frame toFrame(Matrix m, Key key) {
final int log_rows_per_chunk = Math.max(1, FileVec.DFLT_LOG2_CHUNK_SIZE - (int) Math.floor(Math.log(m.cols()) / Math.log(2.)));
Vec[] v = new Vec[m.cols()];
for (int i = 0; i < m.cols(); ++i) {
v[i] = makeCon(0, m.rows(), log_rows_per_chunk);
}
Frame f = new FrameFiller(m).doAll(new Frame(key, v, true))._fr;
DKV.put(key, f);
return f;
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AggregatorModel method scoreExemplarMembers.
@Override
public Frame scoreExemplarMembers(Key<Frame> destination_key, final int exemplarIdx) {
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i = 0; i < c._len; ++i) nc.addNum(c.at8(i) == _exemplars[exemplarIdx].gid ? 1 : 0, 0);
}
}.doAll(Vec.T_NUM, new Frame(new Vec[] { _exemplar_assignment_vec_key.get() })).outputFrame().anyVec();
Frame orig = _parms.train();
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
vecs[vecs.length - 1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
DKV.put(res);
assert (res.numRows() == _counts[exemplarIdx]);
booleanCol.remove();
return res;
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AggregatorModel method createFrameOfExemplars.
public Frame createFrameOfExemplars(Frame orig, Key destination_key) {
final long[] keep = new long[_exemplars.length];
for (int i = 0; i < keep.length; ++i) keep[i] = _exemplars[i].gid;
Vec exAssignment = _exemplar_assignment_vec_key.get();
// preserve the original row order
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, Chunk c2) {
for (int i = 0; i < keep.length; ++i) {
if (keep[i] < c.start())
continue;
if (keep[i] >= c.start() + c._len)
continue;
c2.set((int) (keep[i] - c.start()), 1);
}
}
}.doAll(new Frame(new Vec[] { exAssignment, exAssignment.makeZero() }))._fr.vec(1);
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
vecs[vecs.length - 1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
booleanCol.remove();
assert (res.numRows() == _exemplars.length);
Vec cnts = res.anyVec().makeZero();
Vec.Writer vw = cnts.open();
for (int i = 0; i < _counts.length; ++i) vw.set(i, _counts[i]);
vw.close();
res.add("counts", cnts);
DKV.put(destination_key, res);
return res;
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class DHistogram method initialHist.
// The initial histogram bins are setup from the Vec rollups.
public static DHistogram[] initialHist(Frame fr, int ncols, int nbins, DHistogram[] hs, long seed, SharedTreeModel.SharedTreeParameters parms, Key[] globalQuantilesKey) {
Vec[] vecs = fr.vecs();
for (int c = 0; c < ncols; c++) {
Vec v = vecs[c];
// inclusive vector min
final double minIn = v.isCategorical() ? 0 : Math.max(v.min(), -Double.MAX_VALUE);
// inclusive vector max
final double maxIn = v.isCategorical() ? v.domain().length - 1 : Math.min(v.max(), Double.MAX_VALUE);
// smallest exclusive max
final double maxEx = v.isCategorical() ? v.domain().length : find_maxEx(maxIn, v.isInt() ? 1 : 0);
final long vlen = v.length();
try {
hs[c] = v.naCnt() == vlen || v.min() == v.max() ? null : make(fr._names[c], nbins, (byte) (v.isCategorical() ? 2 : (v.isInt() ? 1 : 0)), minIn, maxEx, seed, parms, globalQuantilesKey[c]);
} catch (StepOutOfRangeException e) {
hs[c] = null;
Log.warn("Column " + fr._names[c] + " with min = " + v.min() + ", max = " + v.max() + " has step out of range (" + e.getMessage() + ") and is ignored.");
}
assert (hs[c] == null || vlen > 0);
}
return hs;
}
Aggregations