use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstRowSlice method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Frame returningFrame;
long nrows = fr.numRows();
if (asts[2] instanceof AstNumList) {
final AstNumList nums = (AstNumList) asts[2];
if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
if (rows != null) {
if (rows.length == 0) {
// Empty inclusion list?
} else if (rows[0] >= 0) {
// Positive (inclusion) list
if (rows[rows.length - 1] > nrows)
throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
} else {
// Negative (exclusion) list
if (rows[rows.length - 1] >= 0)
throw new IllegalArgumentException("Cannot mix negative and postive row selection");
// Invert the list to make a positive list, ignoring out-of-bounds values
BitSet bs = new BitSet((int) nrows);
for (long row : rows) {
// The positive index
int idx = (int) (-row - 1);
if (idx >= 0 && idx < nrows)
// Set column to EXCLUDE
bs.set(idx);
}
rows = new long[(int) nrows - bs.cardinality()];
for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
}
}
final long[] ls = rows;
returningFrame = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
if (nums.cnt() == 0)
return;
if (ls != null && ls.length == 0)
return;
long start = cs[0].start();
long end = start + cs[0]._len;
// exclusive max to inclusive max when stride == 1
long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
//5 [ nums ] nums run rite: start <= nums.min() && end < nums.max()
if (!(max < start || min > end)) {
// not situation 1 or 2 above
// situation 4 and 5 => min > start;
long startOffset = (min > start ? min : start);
for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
for (int c = 0; c < cs.length; ++c) {
if (cs[c] instanceof CStrChunk)
ncs[c].addStr(cs[c], i);
else if (cs[c] instanceof C16Chunk)
ncs[c].addUUID(cs[c], i);
else if (cs[c].isNA(i))
ncs[c].addNA();
else
ncs[c].addNum(cs[c].atd(i));
}
}
}
}
}
}.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
} else if ((asts[2] instanceof AstNum)) {
long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
returningFrame = fr.deepSlice(rows, null);
} else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
Frame predVec = stk.track(asts[2].exec(env)).getFrame();
if (predVec.numCols() != 1)
throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns. Must be a boolean Vec.");
returningFrame = fr.deepSlice(predVec, null);
} else
throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
return new ValFrame(returningFrame);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstScale method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
int ncols = fr.numCols();
// Peel out the bias/shift/mean
double[] means;
if (asts[2] instanceof AstNumList) {
means = ((AstNumList) asts[2]).expand();
if (means.length != ncols)
throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
} else {
double d = asts[2].exec(env).getNum();
if (// No change on means, so zero-filled
d == 0)
// No change on means, so zero-filled
means = new double[ncols];
else if (d == 1)
means = fr.means();
else
throw new IllegalArgumentException("Only true or false allowed");
}
// Peel out the scale/stddev
double[] mults;
if (asts[3] instanceof AstNumList) {
mults = ((AstNumList) asts[3]).expand();
if (mults.length != ncols)
throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
} else {
Val v = asts[3].exec(env);
if (v instanceof ValFrame) {
mults = toArray(v.getFrame().anyVec());
} else {
double d = v.getNum();
if (d == 0)
// No change on mults, so one-filled
Arrays.fill(mults = new double[ncols], 1.0);
else if (d == 1)
mults = fr.mults();
else
throw new IllegalArgumentException("Only true or false allowed");
}
}
// Update in-place.
// Make final copy for closure
final double[] fmeans = means;
// Make final copy for closure
final double[] fmults = mults;
new MRTask() {
@Override
public void map(Chunk[] cs) {
for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
}
}.doAll(fr);
return new ValFrame(fr);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstSetDomain method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame f = stk.track(asts[1].exec(env)).getFrame();
String[] _domains = ((AstStrList) asts[2])._strs;
if (f.numCols() != 1)
throw new IllegalArgumentException("Must be a single column. Got: " + f.numCols() + " columns.");
Vec v = f.anyVec();
if (!v.isCategorical())
throw new IllegalArgumentException("Vector must be a factor column. Got: " + v.get_type_str());
if (_domains != null && _domains.length != v.domain().length) {
// in this case we want to recollect the domain and check that number of levels matches _domains
VecUtils.CollectDomainFast t = new VecUtils.CollectDomainFast((int) v.max());
t.doAll(v);
final long[] dom = t.domain();
if (dom.length != _domains.length)
throw new IllegalArgumentException("Number of replacement factors must equal current number of levels. Current number of levels: " + dom.length + " != " + _domains.length);
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) {
if (!c.isNA(i)) {
long num = Arrays.binarySearch(dom, c.at8(i));
if (num < 0)
throw new IllegalArgumentException("Could not find the categorical value!");
c.set(i, num);
}
}
}
}.doAll(v);
}
v.setDomain(_domains);
DKV.put(v);
return new ValFrame(f);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstBinOp method frame_op_frame.
/**
* Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
* the rest. Otherwise the frames must have the same column count, and
* auto-widen element-by-element. Short-cut if one frame has zero
* columns.
*/
private ValFrame frame_op_frame(Frame lf, Frame rt) {
if (lf.numRows() != rt.numRows()) {
// special case for broadcasting a single row of data across a frame
if (lf.numRows() == 1 || rt.numRows() == 1) {
if (lf.numCols() != rt.numCols())
throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
return frame_op_row(lf, rt);
} else
throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
}
if (lf.numCols() == 0)
return new ValFrame(lf);
if (rt.numCols() == 0)
return new ValFrame(rt);
if (lf.numCols() == 1 && rt.numCols() > 1)
return vec_op_frame(lf.vecs()[0], rt);
if (rt.numCols() == 1 && lf.numCols() > 1)
return frame_op_vec(lf, rt.vecs()[0]);
if (lf.numCols() != rt.numCols())
throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
BufferedString lfstr = new BufferedString();
BufferedString rtstr = new BufferedString();
assert (cress.length << 1) == chks.length;
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
Chunk crt = chks[c + cress.length];
NewChunk cres = cress[c];
if (clf.vec().isString())
for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
else
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
// Cleanup categorical misuse
return cleanCategorical(lf, res);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstBinOp method frame_op_vec.
private ValFrame frame_op_vec(Frame fr, Vec vec) {
// Already checked for same rows, non-zero frame
Frame lf = new Frame(fr);
lf.add("", vec);
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
assert cress.length == chks.length - 1;
Chunk crt = chks[cress.length];
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
Aggregations