use of water.rapids.ast.params.AstStr in project h2o-3 by h2oai.
the class AstMatch method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
if ((fr.numCols() != 1) || !(fr.anyVec().isCategorical() || fr.anyVec().isString()))
throw new IllegalArgumentException("can only match on a single categorical/string column.");
final MRTask<?> matchTask;
double noMatch = asts[3].exec(env).getNum();
if (asts[2] instanceof AstNumList) {
matchTask = new NumMatchTask(((AstNumList) asts[2]).sort().expand(), noMatch);
} else if (asts[2] instanceof AstNum) {
matchTask = new NumMatchTask(new double[] { asts[2].exec(env).getNum() }, noMatch);
} else if (asts[2] instanceof AstStrList) {
String[] values = ((AstStrList) asts[2])._strs;
Arrays.sort(values);
matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
} else if (asts[2] instanceof AstStr) {
String[] values = new String[] { asts[2].exec(env).getStr() };
matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
} else
throw new IllegalArgumentException("Expected numbers/strings. Got: " + asts[2].getClass());
Frame result = matchTask.doAll(Vec.T_NUM, fr.anyVec()).outputFrame();
return new ValFrame(result);
}
use of water.rapids.ast.params.AstStr in project h2o-3 by h2oai.
the class RapidsTest method astStr_ok.
private static void astStr_ok(String expr, String expected) {
AstRoot res = Rapids.parse(expr);
assertTrue(res instanceof AstStr);
assertEquals(expected, ((AstStr) res).getStr());
}
use of water.rapids.ast.params.AstStr in project h2o-3 by h2oai.
the class AstHist method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// stack is [ ..., ary, breaks]
// handle the breaks
Frame fr2;
Frame f = stk.track(asts[1].exec(env)).getFrame();
if (f.numCols() != 1)
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
Vec vec = f.anyVec();
if (!vec.isNumeric())
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
//TODO Add case when vec is a constant numeric
if (vec.isConst())
throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
AstRoot a = asts[2];
String algo = null;
int numBreaks = -1;
double[] breaks = null;
if (a instanceof AstStr)
algo = a.str().toLowerCase();
else if (a instanceof AstNumList)
breaks = ((AstNumList) a).expand();
else if (a instanceof AstNum)
numBreaks = (int) a.exec(env).getNum();
AstHist.HistTask t;
double h;
double x1 = vec.max();
double x0 = vec.min();
if (breaks != null)
t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
else if (algo != null) {
switch(algo) {
case "sturges":
numBreaks = sturges(vec);
h = (x1 - x0) / numBreaks;
break;
case "rice":
numBreaks = rice(vec);
h = (x1 - x0) / numBreaks;
break;
case "sqrt":
numBreaks = sqrt(vec);
h = (x1 - x0) / numBreaks;
break;
case "doane":
numBreaks = doane(vec);
h = (x1 - x0) / numBreaks;
break;
case "scott":
h = scotts_h(vec);
numBreaks = scott(vec, h);
// special bin width computation
break;
case "fd":
h = fds_h(vec);
numBreaks = fd(vec, h);
// special bin width computation
break;
default:
numBreaks = sturges(vec);
// just do sturges even if junk passed in
h = (x1 - x0) / numBreaks;
}
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
} else {
h = (x1 - x0) / numBreaks;
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
}
// wanna make a new frame here [breaks,counts,mids]
final double[] brks = t._breaks;
final long[] cnts = t._counts;
final double[] mids_true = t._mids;
final double[] mids = new double[t._breaks.length - 1];
for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
Vec layoutVec = Vec.makeZero(brks.length);
fr2 = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] nc) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
nc[0].addNum(brks[i + start]);
if (i == 0) {
nc[1].addNA();
nc[2].addNA();
nc[3].addNA();
} else {
nc[1].addNum(cnts[(i - 1) + start]);
nc[2].addNum(mids_true[(i - 1) + start]);
nc[3].addNum(mids[(i - 1) + start]);
}
}
}
}.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
layoutVec.remove();
return new ValFrame(fr2);
}
use of water.rapids.ast.params.AstStr in project h2o-3 by h2oai.
the class AstImpute method apply.
// (h2o.impute data col method combine_method groupby groupByFrame values)
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// Argument parsing and sanity checking
// Whole frame being imputed
Frame fr = stk.track(asts[1].exec(env)).getFrame();
// Column within frame being imputed
final int col = (int) asts[2].exec(env).getNum();
if (col >= fr.numCols())
throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
final boolean doAllVecs = col == -1;
final Vec vec = doAllVecs ? null : fr.vec(col);
// Technique used for imputation
AstRoot method = null;
boolean ffill0 = false, bfill0 = false;
switch(asts[3].exec(env).getStr().toUpperCase()) {
case "MEAN":
method = new AstMean();
break;
case "MEDIAN":
method = new AstMedian();
break;
case "MODE":
method = new AstMode();
break;
case "FFILL":
ffill0 = true;
break;
case "BFILL":
bfill0 = true;
break;
default:
throw new IllegalArgumentException("Method must be one of mean, median or mode");
}
// Only for median, how is the median computed on even sample sizes?
QuantileModel.CombineMethod combine = QuantileModel.CombineMethod.valueOf(asts[4].exec(env).getStr().toUpperCase());
// Group-by columns. Empty is allowed, and perfectly normal.
AstRoot ast = asts[5];
AstNumList by2;
if (ast instanceof AstNumList)
by2 = (AstNumList) ast;
else if (ast instanceof AstNum)
by2 = new AstNumList(((AstNum) ast).getNum());
else if (ast instanceof AstStrList) {
String[] names = ((AstStrList) ast)._strs;
double[] list = new double[names.length];
int i = 0;
for (String name : ((AstStrList) ast)._strs) list[i++] = fr.find(name);
Arrays.sort(list);
by2 = new AstNumList(list);
} else
throw new IllegalArgumentException("Requires a number-list, but found a " + ast.getClass());
Frame groupByFrame = asts[6].str().equals("_") ? null : stk.track(asts[6].exec(env)).getFrame();
AstRoot vals = asts[7];
AstNumList values;
if (vals instanceof AstNumList)
values = (AstNumList) vals;
else if (vals instanceof AstNum)
values = new AstNumList(((AstNum) vals).getNum());
else
values = null;
boolean doGrpBy = !by2.isEmpty() || groupByFrame != null;
// Compute the imputed value per-group. Empty groups are allowed and OK.
IcedHashMap<AstGroup.G, Freezable[]> group_impute_map;
if (!doGrpBy) {
// Skip the grouping work
if (ffill0 || bfill0) {
// do a forward/backward fill on the NA
// TODO: requires chk.previousNonNA and chk.nextNonNA style methods (which may go across chk boundaries)s
final boolean ffill = ffill0;
final boolean bfill = bfill0;
throw H2O.unimpl("No ffill or bfill imputation supported");
// new MRTask() {
// @Override public void map(Chunk[] cs) {
// int len=cs[0]._len; // end of this chk
// long start=cs[0].start(); // absolute beginning of chk s.t. start-1 bleeds into previous chk
// long absEnd = start+len; // absolute end of the chk s.t. absEnd+1 bleeds into next chk
// for(int c=0;c<cs.length;++c )
// for(int r=0;r<cs[0]._len;++r ) {
// if( cs[c].isNA(r) ) {
// if( r > 0 && r < len-1 ) {
// cs[c].set(r,ffill?)
// }
// }
// }
// }
// }.doAll(doAllVecs?fr:new Frame(vec));
// return new ValNum(Double.NaN);
} else {
final double[] res = values == null ? new double[fr.numCols()] : values.expand();
if (values == null) {
// fill up res if no values supplied user, common case
if (doAllVecs) {
for (int i = 0; i < res.length; ++i) if (fr.vec(i).isNumeric() || fr.vec(i).isCategorical())
res[i] = fr.vec(i).isNumeric() ? fr.vec(i).mean() : ArrayUtils.maxIndex(fr.vec(i).bins());
} else {
Arrays.fill(res, Double.NaN);
if (method instanceof AstMean)
res[col] = vec.mean();
if (method instanceof AstMedian)
res[col] = AstMedian.median(new Frame(vec), combine);
if (method instanceof AstMode)
res[col] = AstMode.mode(vec);
}
}
new MRTask() {
@Override
public void map(Chunk[] cs) {
int len = cs[0]._len;
// run down each chk
for (int c = 0; c < cs.length; ++c) if (!Double.isNaN(res[c]))
for (int row = 0; row < len; ++row) if (cs[c].isNA(row))
cs[c].set(row, res[c]);
}
}.doAll(fr);
return new ValNums(res);
}
} else {
if (col >= fr.numCols())
throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
Frame imputes = groupByFrame;
if (imputes == null) {
// Build and run a GroupBy command
AstGroup ast_grp = new AstGroup();
// simple case where user specified a column... col == -1 means do all columns
if (doAllVecs) {
AstRoot[] aggs = new AstRoot[(int) (3 + 3 * (fr.numCols() - by2.cnt()))];
aggs[0] = ast_grp;
aggs[1] = new AstFrame(fr);
aggs[2] = by2;
int c = 3;
for (int i = 0; i < fr.numCols(); ++i) {
if (!by2.has(i) && (fr.vec(i).isCategorical() || fr.vec(i).isNumeric())) {
aggs[c] = fr.vec(i).isNumeric() ? new AstMean() : new AstMode();
aggs[c + 1] = new AstNumList(i, i + 1);
aggs[c + 2] = new AstStr("rm");
c += 3;
}
}
imputes = ast_grp.apply(env, stk, aggs).getFrame();
} else
imputes = ast_grp.apply(env, stk, new AstRoot[] { ast_grp, new AstFrame(fr), by2, /**/
method, new AstNumList(col, col + 1), new AstStr("rm") }).getFrame();
}
if (// >2 makes it ambiguous which columns are groupby cols and which are aggs, throw IAE
by2.isEmpty() && imputes.numCols() > 2)
throw new IllegalArgumentException("Ambiguous group-by frame. Supply the `by` columns to proceed.");
final int[] bycols0 = ArrayUtils.seq(0, Math.max((int) by2.cnt(), 1));
group_impute_map = new Gather(by2.expand4(), bycols0, fr.numCols(), col).doAll(imputes)._group_impute_map;
// Now walk over the data, replace NAs with the imputed results
final IcedHashMap<AstGroup.G, Freezable[]> final_group_impute_map = group_impute_map;
if (by2.isEmpty()) {
int[] byCols = new int[imputes.numCols() - 1];
for (int i = 0; i < byCols.length; ++i) byCols[i] = fr.find(imputes.name(i));
by2 = new AstNumList(byCols);
}
final int[] bycols = by2.expand4();
new MRTask() {
@Override
public void map(Chunk[] cs) {
Set<Integer> _bycolz = new HashSet<>();
for (int b : bycols) _bycolz.add(b);
AstGroup.G g = new AstGroup.G(bycols.length, null);
for (int row = 0; row < cs[0]._len; row++) for (int c = 0; c < cs.length; ++c) if (!_bycolz.contains(c))
if (cs[c].isNA(row))
cs[c].set(row, ((IcedDouble) final_group_impute_map.get(g.fill(row, cs, bycols))[c])._val);
}
}.doAll(fr);
return new ValFrame(imputes);
}
}
use of water.rapids.ast.params.AstStr in project h2o-3 by h2oai.
the class H2OBinaryOp method setupParamsImpl.
@Override
protected void setupParamsImpl(int i, String[] args) {
if (_ast._asts[i + 1] instanceof AstExec) {
if (!isBinaryOp(_fun))
throw H2O.unimpl("unimpl: " + lookup(_fun));
_leftIsCol = args[i].equals("leftArg");
_riteIsCol = !_leftIsCol;
_binCol = ((AstExec) _ast._asts[i + 1])._asts[2].str();
_params.put(args[i], new AstStr(((AstExec) _ast._asts[i + 1])._asts[2].str()));
} else
super.setupParamsImpl(i, args);
}
Aggregations