use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstReplaceAll method apply.
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
final String pattern = asts[2].exec(env).getStr();
final String replacement = asts[3].exec(env).getStr();
Frame fr = stk.track(asts[1].exec(env)).getFrame();
final boolean ignoreCase = asts[4].exec(env).getNum() == 1;
// Type check
for (Vec v : fr.vecs()) if (!(v.isCategorical() || v.isString()))
throw new IllegalArgumentException("replaceall() requires a string or categorical column. " + "Received " + fr.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
// Transform each vec
Vec[] nvs = new Vec[fr.numCols()];
int i = 0;
for (Vec v : fr.vecs()) {
if (v.isCategorical())
nvs[i] = replaceAllCategoricalCol(v, pattern, replacement, ignoreCase);
else
nvs[i] = replaceAllStringCol(v, pattern, replacement, ignoreCase);
i++;
}
return new ValFrame(new Frame(nvs));
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstReplaceFirst method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
final String pattern = asts[2].exec(env).getStr();
final String replacement = asts[3].exec(env).getStr();
Frame fr = stk.track(asts[1].exec(env)).getFrame();
final boolean ignoreCase = asts[4].exec(env).getNum() == 1;
// Type check
for (Vec v : fr.vecs()) if (!(v.isCategorical() || v.isString()))
throw new IllegalArgumentException("replacefirst() requires a string or categorical column. " + "Received " + fr.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
// Transform each vec
Vec[] nvs = new Vec[fr.numCols()];
int i = 0;
for (Vec v : fr.vecs()) {
if (v.isCategorical())
nvs[i] = replaceFirstCategoricalCol(v, pattern, replacement, ignoreCase);
else
nvs[i] = replaceFirstStringCol(v, pattern, replacement, ignoreCase);
i++;
}
return new ValFrame(new Frame(nvs));
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstRBind method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// Execute all args. Find a canonical frame; all Frames must look like this one.
// Each argument turns into either a Frame (whose rows are entirely
// inlined) or a scalar (which is replicated across as a single row).
// Canonical Frame; all frames have the same column count, types and names
Frame fr = null;
// Total chunks
int nchks = 0;
// Computed AstRoot results
Val[] vals = new Val[asts.length];
for (int i = 1; i < asts.length; i++) {
vals[i] = stk.track(asts[i].exec(env));
if (vals[i].isFrame()) {
fr = vals[i].getFrame();
// Total chunks
nchks += fr.anyVec().nChunks();
} else
// One chunk per scalar
nchks++;
}
// No Frame, just a pile-o-scalars?
// The zero-length vec for the zero-frame frame
Vec zz = null;
if (fr == null) {
// Zero-length, 1-column, default name
fr = new Frame(new String[] { Frame.defaultColName(0) }, new Vec[] { zz = Vec.makeZero(0) });
if (asts.length == 1)
return new ValFrame(fr);
}
// Verify all Frames are the same columns, names, and types. Domains can vary, and will be the union
// Input frame
final Frame[] frs = new Frame[asts.length];
// Column types
final byte[] types = fr.types();
// Compute a new layout!
final long[] espc = new long[nchks + 1];
int coffset = 0;
Frame[] tmp_frs = new Frame[asts.length];
for (int i = 1; i < asts.length; i++) {
// Save values computed for pass 2
Val val = vals[i];
Frame fr0 = val.isFrame() ? val.getFrame() : // Scalar: auto-expand into a 1-row frame
(tmp_frs[i] = new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols())));
// Check that all frames are compatible
if (fr.numCols() != fr0.numCols())
throw new IllegalArgumentException("rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns.");
if (!Arrays.deepEquals(fr._names, fr0._names))
throw new IllegalArgumentException("rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names));
if (!Arrays.equals(types, fr0.types()))
throw new IllegalArgumentException("rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types()));
// Save frame
frs[i] = fr0;
// Roll up the ESPC row counts
long roffset = espc[coffset];
long[] espc2 = fr0.anyVec().espc();
for (// Roll up the row counts
int j = 1; // Roll up the row counts
j < espc2.length; // Roll up the row counts
j++) espc[coffset + j] = (roffset + espc2[j]);
// Chunk offset
coffset += espc2.length - 1;
}
if (zz != null)
zz.remove();
// build up the new domains for each vec
HashMap<String, Integer>[] dmap = new HashMap[types.length];
String[][] domains = new String[types.length][];
int[][][] cmaps = new int[types.length][][];
for (int k = 0; k < types.length; ++k) {
dmap[k] = new HashMap<>();
int c = 0;
byte t = types[k];
if (t == Vec.T_CAT) {
int[][] maps = new int[frs.length][];
for (int i = 1; i < frs.length; i++) {
maps[i] = new int[frs[i].vec(k).domain().length];
for (int j = 0; j < maps[i].length; j++) {
String s = frs[i].vec(k).domain()[j];
if (!dmap[k].containsKey(s))
dmap[k].put(s, maps[i][j] = c++);
else
maps[i][j] = dmap[k].get(s);
}
}
cmaps[k] = maps;
} else {
cmaps[k] = new int[frs.length][];
}
domains[k] = c == 0 ? null : new String[c];
for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey();
}
// Now make Keys for the new Vecs
Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols());
Vec[] vecs = new Vec[fr.numCols()];
int rowLayout = Vec.ESPC.rowLayout(keys[0], espc);
for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]);
// Do the row-binds column-by-column.
// Switch to F/J thread for continuations
AstRBind.ParallelRbinds t;
H2O.submitTask(t = new AstRBind.ParallelRbinds(frs, espc, vecs, cmaps)).join();
for (Frame tfr : tmp_frs) if (tfr != null)
tfr.delete();
return new ValFrame(new Frame(fr.names(), t._vecs));
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstBinOp method vec_op_frame.
private ValFrame vec_op_frame(Vec vec, Frame fr) {
// Already checked for same rows, non-zero frame
Frame rt = new Frame(fr);
rt.add("", vec);
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
assert cress.length == chks.length - 1;
Chunk clf = chks[cress.length];
for (int c = 0; c < cress.length; c++) {
Chunk crt = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, rt).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstBinOp method frame_op_row.
private ValFrame frame_op_row(Frame lf, Frame row) {
final double[] rawRow = new double[row.numCols()];
for (int i = 0; i < rawRow.length; ++i) // is numberlike, if not then NaN
rawRow[i] = row.vec(i).isNumeric() || row.vec(i).isTime() ? row.vec(i).at(0) : Double.NaN;
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
NewChunk cres = cress[c];
for (int r = 0; r < clf._len; ++r) {
if (clf.vec().isString())
// TODO: improve
cres.addNum(Double.NaN);
else
cres.addNum(op(clf.atd(r), rawRow[c]));
}
}
}
}.doAll(lf.numCols(), Vec.T_NUM, lf).outputFrame(lf._names, null);
return cleanCategorical(lf, res);
}
Aggregations