use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstBinOp method frame_op_frame.
/**
* Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
* the rest. Otherwise the frames must have the same column count, and
* auto-widen element-by-element. Short-cut if one frame has zero
* columns.
*/
private ValFrame frame_op_frame(Frame lf, Frame rt) {
if (lf.numRows() != rt.numRows()) {
// special case for broadcasting a single row of data across a frame
if (lf.numRows() == 1 || rt.numRows() == 1) {
if (lf.numCols() != rt.numCols())
throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
return frame_op_row(lf, rt);
} else
throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
}
if (lf.numCols() == 0)
return new ValFrame(lf);
if (rt.numCols() == 0)
return new ValFrame(rt);
if (lf.numCols() == 1 && rt.numCols() > 1)
return vec_op_frame(lf.vecs()[0], rt);
if (rt.numCols() == 1 && lf.numCols() > 1)
return frame_op_vec(lf, rt.vecs()[0]);
if (lf.numCols() != rt.numCols())
throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
BufferedString lfstr = new BufferedString();
BufferedString rtstr = new BufferedString();
assert (cress.length << 1) == chks.length;
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
Chunk crt = chks[c + cress.length];
NewChunk cres = cress[c];
if (clf.vec().isString())
for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
else
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
// Cleanup categorical misuse
return cleanCategorical(lf, res);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstBinOp method frame_op_vec.
private ValFrame frame_op_vec(Frame fr, Vec vec) {
// Already checked for same rows, non-zero frame
Frame lf = new Frame(fr);
lf.add("", vec);
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
assert cress.length == chks.length - 1;
Chunk crt = chks[cress.length];
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstBinOp method scalar_op_frame.
/**
* Auto-widen the scalar to every element of the frame
*/
private ValFrame scalar_op_frame(final String str, Frame fr) {
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
BufferedString vstr = new BufferedString();
for (int c = 0; c < chks.length; c++) {
Chunk chk = chks[c];
NewChunk cres = cress[c];
Vec vec = chk.vec();
// String Vectors: apply str_op as BufferedStrings to all elements
if (vec.isString()) {
final BufferedString conStr = new BufferedString(str);
for (int i = 0; i < chk._len; i++) cres.addNum(str_op(conStr, chk.atStr(vstr, i)));
} else if (vec.isCategorical()) {
// categorical Vectors: convert string to domain value; apply op (not
// str_op). Not sure what the "right" behavior here is, can
// easily argue that should instead apply str_op to the categorical
// string domain value - except that this whole operation only
// makes sense for EQ/NE, and is much faster when just comparing
// doubles vs comparing strings.
final double d = (double) ArrayUtils.find(vec.domain(), str);
for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
} else {
// mixing string and numeric
// false or true only
final double d = op(1, 2);
for (int i = 0; i < chk._len; i++) cres.addNum(d);
}
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
return new ValFrame(res);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstAsDate method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Vec vec = fr.vecs()[0];
if (fr.vecs().length != 1 || !(vec.isCategorical() || vec.isString()))
throw new IllegalArgumentException("as.Date requires a single column of factors or strings");
final String format = asts[2].exec(env).getStr();
if (format.isEmpty())
throw new IllegalArgumentException("as.Date requires a non-empty format string");
// check the format string more?
final String[] dom = vec.domain();
final boolean isStr = dom == null && vec.isString();
assert isStr || dom != null : "as.Date error: domain is null, but vec is not String";
Frame fr2 = new MRTask() {
private transient DateTimeFormatter _fmt;
@Override
public void setupLocal() {
_fmt = ParseTime.forStrptimePattern(format).withZone(ParseTime.getTimezone());
}
@Override
public void map(Chunk c, NewChunk nc) {
//done on each node in lieu of rewriting DateTimeFormatter as Iced
String date;
BufferedString tmpStr = new BufferedString();
for (int i = 0; i < c._len; ++i) {
if (!c.isNA(i)) {
if (isStr)
date = c.atStr(tmpStr, i).toString();
else
date = dom[(int) c.at8(i)];
nc.addNum(DateTime.parse(date, _fmt).getMillis(), 0);
} else
nc.addNA();
}
}
}.doAll(1, Vec.T_NUM, fr).outputFrame(fr._names, null);
return new ValFrame(fr2);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstBinOp method vec_op_frame.
private ValFrame vec_op_frame(Vec vec, Frame fr) {
// Already checked for same rows, non-zero frame
Frame rt = new Frame(fr);
rt.add("", vec);
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
assert cress.length == chks.length - 1;
Chunk clf = chks[cress.length];
for (int c = 0; c < cress.length; c++) {
Chunk crt = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, rt).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
Aggregations