use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.
the class AstRowSlice method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Frame returningFrame;
long nrows = fr.numRows();
if (asts[2] instanceof AstNumList) {
final AstNumList nums = (AstNumList) asts[2];
if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
if (rows != null) {
if (rows.length == 0) {
// Empty inclusion list?
} else if (rows[0] >= 0) {
// Positive (inclusion) list
if (rows[rows.length - 1] > nrows)
throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
} else {
// Negative (exclusion) list
if (rows[rows.length - 1] >= 0)
throw new IllegalArgumentException("Cannot mix negative and postive row selection");
// Invert the list to make a positive list, ignoring out-of-bounds values
BitSet bs = new BitSet((int) nrows);
for (long row : rows) {
// The positive index
int idx = (int) (-row - 1);
if (idx >= 0 && idx < nrows)
// Set column to EXCLUDE
bs.set(idx);
}
rows = new long[(int) nrows - bs.cardinality()];
for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
}
}
final long[] ls = rows;
returningFrame = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
if (nums.cnt() == 0)
return;
if (ls != null && ls.length == 0)
return;
long start = cs[0].start();
long end = start + cs[0]._len;
// exclusive max to inclusive max when stride == 1
long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
//5 [ nums ] nums run rite: start <= nums.min() && end < nums.max()
if (!(max < start || min > end)) {
// not situation 1 or 2 above
// situation 4 and 5 => min > start;
long startOffset = (min > start ? min : start);
for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
for (int c = 0; c < cs.length; ++c) {
if (cs[c] instanceof CStrChunk)
ncs[c].addStr(cs[c], i);
else if (cs[c] instanceof C16Chunk)
ncs[c].addUUID(cs[c], i);
else if (cs[c].isNA(i))
ncs[c].addNA();
else
ncs[c].addNum(cs[c].atd(i));
}
}
}
}
}
}.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
} else if ((asts[2] instanceof AstNum)) {
long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
returningFrame = fr.deepSlice(rows, null);
} else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
Frame predVec = stk.track(asts[2].exec(env)).getFrame();
if (predVec.numCols() != 1)
throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns. Must be a boolean Vec.");
returningFrame = fr.deepSlice(predVec, null);
} else
throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
return new ValFrame(returningFrame);
}
use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.
the class AstMatch method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
if ((fr.numCols() != 1) || !(fr.anyVec().isCategorical() || fr.anyVec().isString()))
throw new IllegalArgumentException("can only match on a single categorical/string column.");
final MRTask<?> matchTask;
double noMatch = asts[3].exec(env).getNum();
if (asts[2] instanceof AstNumList) {
matchTask = new NumMatchTask(((AstNumList) asts[2]).sort().expand(), noMatch);
} else if (asts[2] instanceof AstNum) {
matchTask = new NumMatchTask(new double[] { asts[2].exec(env).getNum() }, noMatch);
} else if (asts[2] instanceof AstStrList) {
String[] values = ((AstStrList) asts[2])._strs;
Arrays.sort(values);
matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
} else if (asts[2] instanceof AstStr) {
String[] values = new String[] { asts[2].exec(env).getStr() };
matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
} else
throw new IllegalArgumentException("Expected numbers/strings. Got: " + asts[2].getClass());
Frame result = matchTask.doAll(Vec.T_NUM, fr.anyVec()).outputFrame();
return new ValFrame(result);
}
use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.
the class AstRectangleAssign method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame dst = stk.track(asts[1].exec(env)).getFrame();
Val vsrc = stk.track(asts[2].exec(env));
AstParameter col_list = (AstParameter) asts[3];
// Column selection
AstNumList cols_numlist = new AstNumList(col_list.columns(dst.names()));
// Special for AstAssign: "empty" really means "all"
if (cols_numlist.isEmpty())
cols_numlist = new AstNumList(0, dst.numCols());
// Allow R-like number list expansion: negative column numbers mean exclusion
int[] cols = AstColSlice.col_select(dst.names(), cols_numlist);
// Any COW optimized path changes Vecs in dst._vecs, and so needs a
// defensive copy. Any update-in-place path updates Chunks instead of
// dst._vecs, and does not need a defensive copy. To make life easier,
// just make the copy now.
dst = new Frame(dst._names, dst.vecs().clone());
// Assign over the column slice
if (asts[4] instanceof AstNum || asts[4] instanceof AstNumList) {
// Explictly named row assignment
AstNumList rows = (asts[4] instanceof AstNum) ? new AstNumList(((AstNum) asts[4]).getNum()) : ((AstNumList) asts[4]);
// Empty rows is really: all rows
if (rows.isEmpty())
rows = new AstNumList(0, dst.numRows());
switch(vsrc.type()) {
case Val.NUM:
assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
break;
case Val.STR:
assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
break;
case Val.FRM:
assign_frame_frame(dst, cols, rows, vsrc.getFrame(), env._ses);
break;
default:
throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
}
} else {
// Boolean assignment selection?
Frame rows = stk.track(asts[4].exec(env)).getFrame();
switch(vsrc.type()) {
case Val.NUM:
assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
break;
case Val.STR:
assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
break;
case Val.FRM:
throw H2O.unimpl();
default:
throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
}
}
return new ValFrame(dst);
}
use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.
the class AstHist method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// stack is [ ..., ary, breaks]
// handle the breaks
Frame fr2;
Frame f = stk.track(asts[1].exec(env)).getFrame();
if (f.numCols() != 1)
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
Vec vec = f.anyVec();
if (!vec.isNumeric())
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
//TODO Add case when vec is a constant numeric
if (vec.isConst())
throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
AstRoot a = asts[2];
String algo = null;
int numBreaks = -1;
double[] breaks = null;
if (a instanceof AstStr)
algo = a.str().toLowerCase();
else if (a instanceof AstNumList)
breaks = ((AstNumList) a).expand();
else if (a instanceof AstNum)
numBreaks = (int) a.exec(env).getNum();
AstHist.HistTask t;
double h;
double x1 = vec.max();
double x0 = vec.min();
if (breaks != null)
t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
else if (algo != null) {
switch(algo) {
case "sturges":
numBreaks = sturges(vec);
h = (x1 - x0) / numBreaks;
break;
case "rice":
numBreaks = rice(vec);
h = (x1 - x0) / numBreaks;
break;
case "sqrt":
numBreaks = sqrt(vec);
h = (x1 - x0) / numBreaks;
break;
case "doane":
numBreaks = doane(vec);
h = (x1 - x0) / numBreaks;
break;
case "scott":
h = scotts_h(vec);
numBreaks = scott(vec, h);
// special bin width computation
break;
case "fd":
h = fds_h(vec);
numBreaks = fd(vec, h);
// special bin width computation
break;
default:
numBreaks = sturges(vec);
// just do sturges even if junk passed in
h = (x1 - x0) / numBreaks;
}
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
} else {
h = (x1 - x0) / numBreaks;
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
}
// wanna make a new frame here [breaks,counts,mids]
final double[] brks = t._breaks;
final long[] cnts = t._counts;
final double[] mids_true = t._mids;
final double[] mids = new double[t._breaks.length - 1];
for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
Vec layoutVec = Vec.makeZero(brks.length);
fr2 = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] nc) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
nc[0].addNum(brks[i + start]);
if (i == 0) {
nc[1].addNA();
nc[2].addNA();
nc[3].addNA();
} else {
nc[1].addNum(cnts[(i - 1) + start]);
nc[2].addNum(mids_true[(i - 1) + start]);
nc[3].addNum(mids[(i - 1) + start]);
}
}
}
}.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
layoutVec.remove();
return new ValFrame(fr2);
}
use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.
the class AstImpute method apply.
// (h2o.impute data col method combine_method groupby groupByFrame values)
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// Argument parsing and sanity checking
// Whole frame being imputed
Frame fr = stk.track(asts[1].exec(env)).getFrame();
// Column within frame being imputed
final int col = (int) asts[2].exec(env).getNum();
if (col >= fr.numCols())
throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
final boolean doAllVecs = col == -1;
final Vec vec = doAllVecs ? null : fr.vec(col);
// Technique used for imputation
AstRoot method = null;
boolean ffill0 = false, bfill0 = false;
switch(asts[3].exec(env).getStr().toUpperCase()) {
case "MEAN":
method = new AstMean();
break;
case "MEDIAN":
method = new AstMedian();
break;
case "MODE":
method = new AstMode();
break;
case "FFILL":
ffill0 = true;
break;
case "BFILL":
bfill0 = true;
break;
default:
throw new IllegalArgumentException("Method must be one of mean, median or mode");
}
// Only for median, how is the median computed on even sample sizes?
QuantileModel.CombineMethod combine = QuantileModel.CombineMethod.valueOf(asts[4].exec(env).getStr().toUpperCase());
// Group-by columns. Empty is allowed, and perfectly normal.
AstRoot ast = asts[5];
AstNumList by2;
if (ast instanceof AstNumList)
by2 = (AstNumList) ast;
else if (ast instanceof AstNum)
by2 = new AstNumList(((AstNum) ast).getNum());
else if (ast instanceof AstStrList) {
String[] names = ((AstStrList) ast)._strs;
double[] list = new double[names.length];
int i = 0;
for (String name : ((AstStrList) ast)._strs) list[i++] = fr.find(name);
Arrays.sort(list);
by2 = new AstNumList(list);
} else
throw new IllegalArgumentException("Requires a number-list, but found a " + ast.getClass());
Frame groupByFrame = asts[6].str().equals("_") ? null : stk.track(asts[6].exec(env)).getFrame();
AstRoot vals = asts[7];
AstNumList values;
if (vals instanceof AstNumList)
values = (AstNumList) vals;
else if (vals instanceof AstNum)
values = new AstNumList(((AstNum) vals).getNum());
else
values = null;
boolean doGrpBy = !by2.isEmpty() || groupByFrame != null;
// Compute the imputed value per-group. Empty groups are allowed and OK.
IcedHashMap<AstGroup.G, Freezable[]> group_impute_map;
if (!doGrpBy) {
// Skip the grouping work
if (ffill0 || bfill0) {
// do a forward/backward fill on the NA
// TODO: requires chk.previousNonNA and chk.nextNonNA style methods (which may go across chk boundaries)s
final boolean ffill = ffill0;
final boolean bfill = bfill0;
throw H2O.unimpl("No ffill or bfill imputation supported");
// new MRTask() {
// @Override public void map(Chunk[] cs) {
// int len=cs[0]._len; // end of this chk
// long start=cs[0].start(); // absolute beginning of chk s.t. start-1 bleeds into previous chk
// long absEnd = start+len; // absolute end of the chk s.t. absEnd+1 bleeds into next chk
// for(int c=0;c<cs.length;++c )
// for(int r=0;r<cs[0]._len;++r ) {
// if( cs[c].isNA(r) ) {
// if( r > 0 && r < len-1 ) {
// cs[c].set(r,ffill?)
// }
// }
// }
// }
// }.doAll(doAllVecs?fr:new Frame(vec));
// return new ValNum(Double.NaN);
} else {
final double[] res = values == null ? new double[fr.numCols()] : values.expand();
if (values == null) {
// fill up res if no values supplied user, common case
if (doAllVecs) {
for (int i = 0; i < res.length; ++i) if (fr.vec(i).isNumeric() || fr.vec(i).isCategorical())
res[i] = fr.vec(i).isNumeric() ? fr.vec(i).mean() : ArrayUtils.maxIndex(fr.vec(i).bins());
} else {
Arrays.fill(res, Double.NaN);
if (method instanceof AstMean)
res[col] = vec.mean();
if (method instanceof AstMedian)
res[col] = AstMedian.median(new Frame(vec), combine);
if (method instanceof AstMode)
res[col] = AstMode.mode(vec);
}
}
new MRTask() {
@Override
public void map(Chunk[] cs) {
int len = cs[0]._len;
// run down each chk
for (int c = 0; c < cs.length; ++c) if (!Double.isNaN(res[c]))
for (int row = 0; row < len; ++row) if (cs[c].isNA(row))
cs[c].set(row, res[c]);
}
}.doAll(fr);
return new ValNums(res);
}
} else {
if (col >= fr.numCols())
throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
Frame imputes = groupByFrame;
if (imputes == null) {
// Build and run a GroupBy command
AstGroup ast_grp = new AstGroup();
// simple case where user specified a column... col == -1 means do all columns
if (doAllVecs) {
AstRoot[] aggs = new AstRoot[(int) (3 + 3 * (fr.numCols() - by2.cnt()))];
aggs[0] = ast_grp;
aggs[1] = new AstFrame(fr);
aggs[2] = by2;
int c = 3;
for (int i = 0; i < fr.numCols(); ++i) {
if (!by2.has(i) && (fr.vec(i).isCategorical() || fr.vec(i).isNumeric())) {
aggs[c] = fr.vec(i).isNumeric() ? new AstMean() : new AstMode();
aggs[c + 1] = new AstNumList(i, i + 1);
aggs[c + 2] = new AstStr("rm");
c += 3;
}
}
imputes = ast_grp.apply(env, stk, aggs).getFrame();
} else
imputes = ast_grp.apply(env, stk, new AstRoot[] { ast_grp, new AstFrame(fr), by2, /**/
method, new AstNumList(col, col + 1), new AstStr("rm") }).getFrame();
}
if (// >2 makes it ambiguous which columns are groupby cols and which are aggs, throw IAE
by2.isEmpty() && imputes.numCols() > 2)
throw new IllegalArgumentException("Ambiguous group-by frame. Supply the `by` columns to proceed.");
final int[] bycols0 = ArrayUtils.seq(0, Math.max((int) by2.cnt(), 1));
group_impute_map = new Gather(by2.expand4(), bycols0, fr.numCols(), col).doAll(imputes)._group_impute_map;
// Now walk over the data, replace NAs with the imputed results
final IcedHashMap<AstGroup.G, Freezable[]> final_group_impute_map = group_impute_map;
if (by2.isEmpty()) {
int[] byCols = new int[imputes.numCols() - 1];
for (int i = 0; i < byCols.length; ++i) byCols[i] = fr.find(imputes.name(i));
by2 = new AstNumList(byCols);
}
final int[] bycols = by2.expand4();
new MRTask() {
@Override
public void map(Chunk[] cs) {
Set<Integer> _bycolz = new HashSet<>();
for (int b : bycols) _bycolz.add(b);
AstGroup.G g = new AstGroup.G(bycols.length, null);
for (int row = 0; row < cs[0]._len; row++) for (int c = 0; c < cs.length; ++c) if (!_bycolz.contains(c))
if (cs[c].isNA(row))
cs[c].set(row, ((IcedDouble) final_group_impute_map.get(g.fill(row, cs, bycols))[c])._val);
}
}.doAll(fr);
return new ValFrame(imputes);
}
}
Aggregations