use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstTable method fast_table.
// -------------------------------------------------------------------------
// Fast-path for 1 integer column
private ValFrame fast_table(Vec v1, int ncols, String colname) {
if (ncols != 1 || !v1.isInt())
return null;
long spanl = (long) v1.max() - (long) v1.min() + 1;
// Cap at decent array size, for performance
if (spanl > 1000000)
return null;
// First fast-pass counting
AstTable.FastCnt fastCnt = new AstTable.FastCnt((long) v1.min(), (int) spanl).doAll(v1);
final long[] cnts = fastCnt._cnts;
final long minVal = fastCnt._min;
// Second pass to build the result frame, skipping zeros
Vec dataLayoutVec = Vec.makeCon(0, cnts.length);
Frame fr = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk nc0, NewChunk nc1) {
final Chunk c = cs[0];
for (int i = 0; i < c._len; ++i) {
int idx = (int) (i + c.start());
if (cnts[idx] > 0) {
nc0.addNum(idx + minVal);
nc1.addNum(cnts[idx]);
}
}
}
}.doAll(new byte[] { Vec.T_NUM, Vec.T_NUM }, dataLayoutVec).outputFrame(new String[] { colname, "Count" }, new String[][] { v1.domain(), null });
dataLayoutVec.remove();
return new ValFrame(fr);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstIsNa method exec.
@Override
public Val exec(Val... args) {
Val val = args[1];
switch(val.type()) {
case Val.NUM:
return new ValNum(op(val.getNum()));
case Val.FRM:
Frame fr = val.getFrame();
String[] newNames = new String[fr.numCols()];
for (int i = 0; i < newNames.length; i++) {
newNames[i] = "isNA(" + fr.name(i) + ")";
}
return new ValFrame(new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
for (int col = 0; col < cs.length; col++) {
Chunk c = cs[col];
NewChunk nc = ncs[col];
for (int i = 0; i < c._len; i++) nc.addNum(c.isNA(i) ? 1 : 0);
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
case Val.STR:
return new ValNum(val.getStr() == null ? 1 : 0);
default:
throw H2O.unimpl("is.na unimpl: " + val.getClass());
}
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstLevels method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame f = stk.track(asts[1].exec(env)).getFrame();
Futures fs = new Futures();
Key[] keys = Vec.VectorGroup.VG_LEN1.addVecs(f.numCols());
Vec[] vecs = new Vec[keys.length];
// compute the longest vec... that's the one with the most domain levels
int max = 0;
for (int i = 0; i < f.numCols(); ++i) if (f.vec(i).isCategorical())
if (max < f.vec(i).domain().length)
max = f.vec(i).domain().length;
final int rowLayout = Vec.ESPC.rowLayout(keys[0], new long[] { 0, max });
for (int i = 0; i < f.numCols(); ++i) {
AppendableVec v = new AppendableVec(keys[i], Vec.T_NUM);
NewChunk nc = new NewChunk(v, 0);
String[] dom = f.vec(i).domain();
int numToPad = dom == null ? max : max - dom.length;
if (dom != null)
for (int j = 0; j < dom.length; ++j) nc.addNum(j);
for (int j = 0; j < numToPad; ++j) nc.addNA();
nc.close(0, fs);
vecs[i] = v.close(rowLayout, fs);
vecs[i].setDomain(dom);
}
fs.blockForPending();
Frame fr2 = new Frame(vecs);
return new ValFrame(fr2);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstBinOp method scalar_op_frame.
/**
* Auto-widen the scalar to every element of the frame
*/
private ValFrame scalar_op_frame(final double d, Frame fr) {
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
for (int c = 0; c < chks.length; c++) {
Chunk chk = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstIfElse method apply.
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Val val = stk.track(asts[1].exec(env));
if (val.isNum()) {
// Scalar test, scalar result
double d = val.getNum();
if (Double.isNaN(d))
return new ValNum(Double.NaN);
// exec only 1 of false and true
Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
}
// Frame test. Frame result.
if (val.type() == Val.ROW)
return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
Frame tst = val.getFrame();
// If all zero's, return false and never execute true.
Frame fr = new Frame(tst);
Val tval = null;
for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
tval = exec_check(env, stk, tst, asts[2], fr);
break;
}
final boolean has_tfr = tval != null && tval.isFrame();
final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
final int[] tsIntMap = new int[tst.numCols()];
// If all nonzero's (or NA's), then never execute false.
Val fval = null;
for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
fval = exec_check(env, stk, tst, asts[3], fr);
break;
}
final boolean has_ffr = fval != null && fval.isFrame();
final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
final int[] fsIntMap = new int[tst.numCols()];
String[][] domains = null;
final int[][] maps = new int[tst.numCols()][];
if (fs != null || ts != null) {
// time to build domains...
domains = new String[tst.numCols()][];
if (fs != null && ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
// false => 0; truth => 1
domains[i] = new String[] { fs, ts };
fsIntMap[i] = 0;
tsIntMap[i] = 1;
}
} else if (ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
if (has_ffr) {
Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = ts;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
tsIntMap[i] = ArrayUtils.find(dom, ts);
domains[i] = dom;
} else
throw H2O.unimpl();
}
} else {
// fs!=null
for (int i = 0; i < tst.numCols(); ++i) {
if (has_tfr) {
Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = fs;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
fsIntMap[i] = ArrayUtils.find(dom, fs);
domains[i] = dom;
} else
throw H2O.unimpl();
}
}
}
// Now pick from left-or-right in the new frame
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] nchks) {
assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
for (int i = 0; i < nchks.length; i++) {
Chunk ctst = chks[i];
NewChunk res = nchks[i];
for (int row = 0; row < ctst._len; row++) {
double d;
if (ctst.isNA(row))
d = Double.NaN;
else if (ctst.atd(row) == 0)
d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
else
d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
res.addNum(d);
}
}
}
}.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
// flatten domains since they may be larger than needed
if (domains != null) {
for (int i = 0; i < res.numCols(); ++i) {
if (res.vec(i).domain() != null) {
final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
String[] newDomain = new String[dom.length];
for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) {
if (!c.isNA(i))
c.set(i, ArrayUtils.find(dom, c.at8(i)));
}
}
}.doAll(res.vec(i));
// needs a DKVput?
res.vec(i).setDomain(newDomain);
}
}
}
return new ValFrame(res);
}
Aggregations