use of water.MRTask in project h2o-3 by h2oai.
the class AstReplaceFirst method replaceFirstStringCol.
private Vec replaceFirstStringCol(Vec vec, String pat, String rep, boolean ic) {
final String pattern = pat;
final String replacement = rep;
final boolean ignoreCase = ic;
return new MRTask() {
@Override
public void map(Chunk chk, NewChunk newChk) {
if (// all NAs
chk instanceof C0DChunk)
for (int i = 0; i < chk.len(); i++) newChk.addNA();
else {
// if (((CStrChunk)chk)._isAllASCII) { // fast-path operations
// ((CStrChunk) chk).asciiReplaceFirst(newChk);
// } else { //UTF requires Java string methods for accuracy
BufferedString tmpStr = new BufferedString();
for (int i = 0; i < chk._len; i++) {
if (chk.isNA(i))
newChk.addNA();
else {
if (ignoreCase)
newChk.addStr(chk.atStr(tmpStr, i).toString().toLowerCase(Locale.ENGLISH).replaceFirst(pattern, replacement));
else
newChk.addStr(chk.atStr(tmpStr, i).toString().replaceFirst(pattern, replacement));
}
}
}
}
}.doAll(new byte[] { Vec.T_STR }, vec).outputFrame().anyVec();
}
use of water.MRTask in project h2o-3 by h2oai.
the class AstStrSplit method strSplitCategoricalCol.
private Vec[] strSplitCategoricalCol(Vec vec, String splitRegEx) {
final String[] old_domains = vec.domain();
final String[][] new_domains = newDomains(old_domains, splitRegEx);
final String regex = splitRegEx;
return new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
Chunk c = cs[0];
for (int i = 0; i < c._len; ++i) {
int cnt = 0;
if (!c.isNA(i)) {
int idx = (int) c.at8(i);
String s = old_domains[idx];
String[] ss = s.split(regex);
for (String s1 : ss) {
int n_idx = Arrays.asList(new_domains[cnt]).indexOf(s1);
if (n_idx == -1)
ncs[cnt++].addNA();
else
ncs[cnt++].addNum(n_idx);
}
}
if (cnt < ncs.length)
for (; cnt < ncs.length; ++cnt) ncs[cnt].addNA();
}
}
}.doAll(new_domains.length, Vec.T_CAT, new Frame(vec)).outputFrame(null, null, new_domains).vecs();
}
use of water.MRTask in project h2o-3 by h2oai.
the class AstSetTimeZone method apply.
@Override
public ValNum apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
final String tz = asts[1].exec(env).getStr();
Set<String> idSet = DateTimeZone.getAvailableIDs();
if (!idSet.contains(tz))
throw new IllegalArgumentException("Unacceptable timezone " + tz + " given. For a list of acceptable names, use listTimezone().");
new MRTask() {
@Override
public void setupLocal() {
ParseTime.setTimezone(tz);
}
}.doAllNodes();
return new ValNum(Double.NaN);
}
use of water.MRTask in project h2o-3 by h2oai.
the class AstBinOp method vec_op_frame.
private ValFrame vec_op_frame(Vec vec, Frame fr) {
// Already checked for same rows, non-zero frame
Frame rt = new Frame(fr);
rt.add("", vec);
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
assert cress.length == chks.length - 1;
Chunk clf = chks[cress.length];
for (int c = 0; c < cress.length; c++) {
Chunk crt = chks[c];
NewChunk cres = cress[c];
for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, rt).outputFrame(fr._names, null);
// Cleanup categorical misuse
return cleanCategorical(fr, res);
}
use of water.MRTask in project h2o-3 by h2oai.
the class AstBinOp method frame_op_row.
private ValFrame frame_op_row(Frame lf, Frame row) {
final double[] rawRow = new double[row.numCols()];
for (int i = 0; i < rawRow.length; ++i) // is numberlike, if not then NaN
rawRow[i] = row.vec(i).isNumeric() || row.vec(i).isTime() ? row.vec(i).at(0) : Double.NaN;
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
for (int c = 0; c < cress.length; c++) {
Chunk clf = chks[c];
NewChunk cres = cress[c];
for (int r = 0; r < clf._len; ++r) {
if (clf.vec().isString())
// TODO: improve
cres.addNum(Double.NaN);
else
cres.addNum(op(clf.atd(r), rawRow[c]));
}
}
}
}.doAll(lf.numCols(), Vec.T_NUM, lf).outputFrame(lf._names, null);
return cleanCategorical(lf, res);
}
Aggregations