use of water.fvec.Chunk in project h2o-3 by h2oai.
the class AstIsNa method exec.
@Override
public Val exec(Val... args) {
Val val = args[1];
switch(val.type()) {
case Val.NUM:
return new ValNum(op(val.getNum()));
case Val.FRM:
Frame fr = val.getFrame();
String[] newNames = new String[fr.numCols()];
for (int i = 0; i < newNames.length; i++) {
newNames[i] = "isNA(" + fr.name(i) + ")";
}
return new ValFrame(new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
for (int col = 0; col < cs.length; col++) {
Chunk c = cs[col];
NewChunk nc = ncs[col];
for (int i = 0; i < c._len; i++) nc.addNum(c.isNA(i) ? 1 : 0);
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
case Val.STR:
return new ValNum(val.getStr() == null ? 1 : 0);
default:
throw H2O.unimpl("is.na unimpl: " + val.getClass());
}
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class AstBinOp method scalar_op_frame.
/**
* Auto-widen the scalar to every element of the frame
*/
private ValFrame scalar_op_frame(final double d, Frame fr) {
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
for (int c = 0; c < chks.length; c++) {
Chunk chk = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class AstIfElse method apply.
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Val val = stk.track(asts[1].exec(env));
if (val.isNum()) {
// Scalar test, scalar result
double d = val.getNum();
if (Double.isNaN(d))
return new ValNum(Double.NaN);
// exec only 1 of false and true
Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
}
// Frame test. Frame result.
if (val.type() == Val.ROW)
return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
Frame tst = val.getFrame();
// If all zero's, return false and never execute true.
Frame fr = new Frame(tst);
Val tval = null;
for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
tval = exec_check(env, stk, tst, asts[2], fr);
break;
}
final boolean has_tfr = tval != null && tval.isFrame();
final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
final int[] tsIntMap = new int[tst.numCols()];
// If all nonzero's (or NA's), then never execute false.
Val fval = null;
for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
fval = exec_check(env, stk, tst, asts[3], fr);
break;
}
final boolean has_ffr = fval != null && fval.isFrame();
final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
final int[] fsIntMap = new int[tst.numCols()];
String[][] domains = null;
final int[][] maps = new int[tst.numCols()][];
if (fs != null || ts != null) {
// time to build domains...
domains = new String[tst.numCols()][];
if (fs != null && ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
// false => 0; truth => 1
domains[i] = new String[] { fs, ts };
fsIntMap[i] = 0;
tsIntMap[i] = 1;
}
} else if (ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
if (has_ffr) {
Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = ts;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
tsIntMap[i] = ArrayUtils.find(dom, ts);
domains[i] = dom;
} else
throw H2O.unimpl();
}
} else {
// fs!=null
for (int i = 0; i < tst.numCols(); ++i) {
if (has_tfr) {
Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = fs;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
fsIntMap[i] = ArrayUtils.find(dom, fs);
domains[i] = dom;
} else
throw H2O.unimpl();
}
}
}
// Now pick from left-or-right in the new frame
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] nchks) {
assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
for (int i = 0; i < nchks.length; i++) {
Chunk ctst = chks[i];
NewChunk res = nchks[i];
for (int row = 0; row < ctst._len; row++) {
double d;
if (ctst.isNA(row))
d = Double.NaN;
else if (ctst.atd(row) == 0)
d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
else
d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
res.addNum(d);
}
}
}
}.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
// flatten domains since they may be larger than needed
if (domains != null) {
for (int i = 0; i < res.numCols(); ++i) {
if (res.vec(i).domain() != null) {
final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
String[] newDomain = new String[dom.length];
for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) {
if (!c.isNA(i))
c.set(i, ArrayUtils.find(dom, c.at8(i)));
}
}
}.doAll(res.vec(i));
// needs a DKVput?
res.vec(i).setDomain(newDomain);
}
}
}
return new ValFrame(res);
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class AstMad method mad.
public static double mad(Frame f, QuantileModel.CombineMethod cm, double constant) {
// need Frames everywhere because of QuantileModel demanding a Frame...
Key tk = null;
if (f._key == null) {
DKV.put(tk = Key.make(), f = new Frame(tk, f.names(), f.vecs()));
}
final double median = AstMedian.median(f, cm);
Frame abs_dev = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i = 0; i < c._len; ++i) nc.addNum(Math.abs(c.at8(i) - median));
}
}.doAll(1, Vec.T_NUM, f).outputFrame();
if (abs_dev._key == null) {
DKV.put(tk = Key.make(), abs_dev = new Frame(tk, abs_dev.names(), abs_dev.vecs()));
}
double mad = AstMedian.median(abs_dev, cm);
// drp mapping, keep vec
DKV.remove(f._key);
DKV.remove(abs_dev._key);
return constant * mad;
}
use of water.fvec.Chunk in project h2o-3 by h2oai.
the class AstDiffLag1 method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env).getFrame());
if (fr.numCols() != 1)
throw new IllegalArgumentException("Expected a single column for diff. Got: " + fr.numCols() + " columns.");
if (!fr.anyVec().isNumeric())
throw new IllegalArgumentException("Expected a numeric column for diff. Got: " + fr.anyVec().get_type_str());
final double[] lastElemPerChk = GetLastElemPerChunkTask.get(fr.anyVec());
return new ValFrame(new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
if (c.cidx() == 0)
nc.addNA();
else
nc.addNum(c.atd(0) - lastElemPerChk[c.cidx() - 1]);
for (int row = 1; row < c._len; ++row) nc.addNum(c.atd(row) - c.atd(row - 1));
}
}.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()));
}
Aggregations