use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.
the class AstDdply method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
int ncols = fr.numCols();
AstNumList groupby = AstGroup.check(ncols, asts[2]);
int[] gbCols = groupby.expand4();
AstRoot fun = asts[3].exec(env).getFun();
// Current execution scope; needed to lookup variables
AstFunction scope = env._scope;
// Pass 1: Find all the groups (and count rows-per-group)
IcedHashMap<AstGroup.G, String> gss = AstGroup.doGroups(fr, gbCols, AstGroup.aggNRows());
final AstGroup.G[] grps = gss.keySet().toArray(new AstGroup.G[gss.size()]);
// apply an ORDER by here...
final int[] ordCols = new AstNumList(0, gbCols.length).expand4();
Arrays.sort(grps, new java.util.Comparator<AstGroup.G>() {
// Compare 2 groups. Iterate down _gs, stop when _gs[i] > that._gs[i],
// or _gs[i] < that._gs[i]. Order by various columns specified by
// _orderByCols. NaN is treated as least
@Override
public int compare(AstGroup.G g1, AstGroup.G g2) {
for (int i : ordCols) {
if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
return -1;
if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
return 1;
if (g1._gs[i] != g2._gs[i])
return g1._gs[i] < g2._gs[i] ? -1 : 1;
}
return 0;
}
// I do not believe sort() calls equals() at this time, so no need to implement
@Override
public boolean equals(Object o) {
throw H2O.unimpl();
}
});
// Uniquely number the groups
for (int gnum = 0; gnum < grps.length; gnum++) grps[gnum]._dss[0][0] = gnum;
// Pass 2: Build all the groups, building 1 Vec per-group, with exactly the
// same Chunk layout, except each Chunk will be the filter rows numbers; a
// list of the Chunk-relative row-numbers for that group in an original
// data Chunk. Each Vec will have a *different* number of rows.
Vec[] vgrps = new BuildGroup(gbCols, gss).doAll(gss.size(), Vec.T_NUM, fr).close();
// Pass 3: For each group, build a full frame for the group, run the
// function on it and tear the frame down.
// gather up the remote tasks...
final RemoteRapids[] remoteTasks = new RemoteRapids[gss.size()];
Futures fs = new Futures();
for (int i = 0; i < remoteTasks.length; i++) fs.add(RPC.call(vgrps[i]._key.home_node(), remoteTasks[i] = new RemoteRapids(fr, vgrps[i]._key, fun, scope)));
fs.blockForPending();
// Build the output!
final double[] res0 = remoteTasks[0]._result;
String[] fcnames = new String[res0.length];
for (int i = 0; i < res0.length; i++) fcnames[i] = "ddply_C" + (i + 1);
MRTask mrfill = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] ncs) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
// One Group per row
AstGroup.G g = grps[i + start];
int j;
for (// The Group Key, as a row
j = 0; // The Group Key, as a row
j < g._gs.length; // The Group Key, as a row
j++) ncs[j].addNum(g._gs[j]);
double[] res = remoteTasks[i + start]._result;
for (int a = 0; a < res0.length; a++) ncs[j++].addNum(res[a]);
}
}
};
Frame f = AstGroup.buildOutput(gbCols, res0.length, fr, fcnames, gss.size(), mrfill);
return new ValFrame(f);
}
use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.
the class AstGroup method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
int ncols = fr.numCols();
AstNumList groupby = check(ncols, asts[2]);
final int[] gbCols = groupby.expand4();
// Count of aggregates; knock off the first 4 ASTs (GB data [group-by] [order-by]...),
// then count by triples.
int naggs = (asts.length - 3) / 3;
final AGG[] aggs = new AGG[naggs];
for (int idx = 3; idx < asts.length; idx += 3) {
Val v = asts[idx].exec(env);
String fn = v instanceof ValFun ? v.getFun().str() : v.getStr();
FCN fcn = FCN.valueOf(fn);
AstNumList col = check(ncols, asts[idx + 1]);
if (col.cnt() != 1)
throw new IllegalArgumentException("Group-By functions take only a single column");
// Aggregate column
int agg_col = (int) col.min();
if (fcn == FCN.mode && !fr.vec(agg_col).isCategorical())
throw new IllegalArgumentException("Mode only allowed on categorical columns");
NAHandling na = NAHandling.valueOf(asts[idx + 2].exec(env).getStr().toUpperCase());
aggs[(idx - 3) / 3] = new AGG(fcn, agg_col, na, (int) fr.vec(agg_col).max() + 1);
}
// do the group by work now
IcedHashMap<G, String> gss = doGroups(fr, gbCols, aggs);
final G[] grps = gss.keySet().toArray(new G[gss.size()]);
// apply an ORDER by here...
if (gbCols.length > 0)
Arrays.sort(grps, new java.util.Comparator<G>() {
// Compare 2 groups. Iterate down _gs, stop when _gs[i] > that._gs[i],
// or _gs[i] < that._gs[i]. Order by various columns specified by
// gbCols. NaN is treated as least
@Override
public int compare(G g1, G g2) {
for (int i = 0; i < gbCols.length; i++) {
if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
return -1;
if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
return 1;
if (g1._gs[i] != g2._gs[i])
return g1._gs[i] < g2._gs[i] ? -1 : 1;
}
return 0;
}
// I do not believe sort() calls equals() at this time, so no need to implement
@Override
public boolean equals(Object o) {
throw H2O.unimpl();
}
});
// Build the output!
String[] fcnames = new String[aggs.length];
for (int i = 0; i < aggs.length; i++) fcnames[i] = aggs[i]._fcn.toString() + "_" + fr.name(aggs[i]._col);
MRTask mrfill = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] ncs) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
// One Group per row
G g = grps[i + start];
int j;
for (// The Group Key, as a row
j = 0; // The Group Key, as a row
j < g._gs.length; // The Group Key, as a row
j++) ncs[j].addNum(g._gs[j]);
for (int a = 0; a < aggs.length; a++) ncs[j++].addNum(aggs[a]._fcn.postPass(g._dss[a], g._ns[a]));
}
}
};
Frame f = buildOutput(gbCols, naggs, fr, fcnames, grps.length, mrfill);
return new ValFrame(f);
}
use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.
the class AstGroupedPermute method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
final int permCol = (int) asts[2].exec(env).getNum();
AstNumList groupby = AstGroup.check(fr.numCols(), asts[3]);
final int[] gbCols = groupby.expand4();
final int permuteBy = (int) asts[4].exec(env).getNum();
final int keepCol = (int) asts[5].exec(env).getNum();
String[] names = new String[gbCols.length + 4];
int i = 0;
for (; i < gbCols.length; ++i) names[i] = fr.name(gbCols[i]);
names[i++] = "In";
names[i++] = "Out";
names[i++] = "InAmnt";
names[i] = "OutAmnt";
String[][] domains = new String[names.length][];
int d = 0;
for (; d < gbCols.length; d++) domains[d] = fr.domains()[gbCols[d]];
domains[d++] = fr.domains()[permCol];
domains[d++] = fr.domains()[permCol];
domains[d++] = fr.domains()[keepCol];
domains[d] = fr.domains()[keepCol];
long s = System.currentTimeMillis();
BuildGroups t = new BuildGroups(gbCols, permuteBy, permCol, keepCol).doAll(fr);
Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
s = System.currentTimeMillis();
SmashGroups sg;
H2O.submitTask(sg = new SmashGroups(t._grps)).join();
Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
return new ValFrame(buildOutput(sg._res.values().toArray(new double[0][][]), names, domains));
}
use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.
the class AstRowSlice method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Frame returningFrame;
long nrows = fr.numRows();
if (asts[2] instanceof AstNumList) {
final AstNumList nums = (AstNumList) asts[2];
if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
if (rows != null) {
if (rows.length == 0) {
// Empty inclusion list?
} else if (rows[0] >= 0) {
// Positive (inclusion) list
if (rows[rows.length - 1] > nrows)
throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
} else {
// Negative (exclusion) list
if (rows[rows.length - 1] >= 0)
throw new IllegalArgumentException("Cannot mix negative and postive row selection");
// Invert the list to make a positive list, ignoring out-of-bounds values
BitSet bs = new BitSet((int) nrows);
for (long row : rows) {
// The positive index
int idx = (int) (-row - 1);
if (idx >= 0 && idx < nrows)
// Set column to EXCLUDE
bs.set(idx);
}
rows = new long[(int) nrows - bs.cardinality()];
for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
}
}
final long[] ls = rows;
returningFrame = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
if (nums.cnt() == 0)
return;
if (ls != null && ls.length == 0)
return;
long start = cs[0].start();
long end = start + cs[0]._len;
// exclusive max to inclusive max when stride == 1
long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
//5 [ nums ] nums run rite: start <= nums.min() && end < nums.max()
if (!(max < start || min > end)) {
// not situation 1 or 2 above
// situation 4 and 5 => min > start;
long startOffset = (min > start ? min : start);
for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
for (int c = 0; c < cs.length; ++c) {
if (cs[c] instanceof CStrChunk)
ncs[c].addStr(cs[c], i);
else if (cs[c] instanceof C16Chunk)
ncs[c].addUUID(cs[c], i);
else if (cs[c].isNA(i))
ncs[c].addNA();
else
ncs[c].addNum(cs[c].atd(i));
}
}
}
}
}
}.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
} else if ((asts[2] instanceof AstNum)) {
long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
returningFrame = fr.deepSlice(rows, null);
} else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
Frame predVec = stk.track(asts[2].exec(env)).getFrame();
if (predVec.numCols() != 1)
throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns. Must be a boolean Vec.");
returningFrame = fr.deepSlice(predVec, null);
} else
throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
return new ValFrame(returningFrame);
}
use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.
the class AstScale method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
int ncols = fr.numCols();
// Peel out the bias/shift/mean
double[] means;
if (asts[2] instanceof AstNumList) {
means = ((AstNumList) asts[2]).expand();
if (means.length != ncols)
throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
} else {
double d = asts[2].exec(env).getNum();
if (// No change on means, so zero-filled
d == 0)
// No change on means, so zero-filled
means = new double[ncols];
else if (d == 1)
means = fr.means();
else
throw new IllegalArgumentException("Only true or false allowed");
}
// Peel out the scale/stddev
double[] mults;
if (asts[3] instanceof AstNumList) {
mults = ((AstNumList) asts[3]).expand();
if (mults.length != ncols)
throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
} else {
Val v = asts[3].exec(env);
if (v instanceof ValFrame) {
mults = toArray(v.getFrame().anyVec());
} else {
double d = v.getNum();
if (d == 0)
// No change on mults, so one-filled
Arrays.fill(mults = new double[ncols], 1.0);
else if (d == 1)
mults = fr.mults();
else
throw new IllegalArgumentException("Only true or false allowed");
}
}
// Update in-place.
// Make final copy for closure
final double[] fmeans = means;
// Make final copy for closure
final double[] fmults = mults;
new MRTask() {
@Override
public void map(Chunk[] cs) {
for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
}
}.doAll(fr);
return new ValFrame(fr);
}
Aggregations