use of water.parser.BufferedString in project h2o-3 by h2oai.
the class WordCountTask method map.
@Override
public void map(Chunk cs) {
_counts = new IcedHashMap<>();
for (int i = 0; i < cs._len; i++) {
if (cs.isNA(i))
continue;
BufferedString str = cs.atStr(new BufferedString(), i);
IcedLong count = _counts.get(str);
if (count != null)
count._val++;
else
_counts.put(str, new IcedLong(1));
}
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class AstEntropy method entropyStringCol.
private Vec entropyStringCol(Vec vec) {
return new MRTask() {
@Override
public void map(Chunk chk, NewChunk newChk) {
if (//all NAs
chk instanceof C0DChunk)
newChk.addNAs(chk.len());
else if (//fast-path operations
((CStrChunk) chk)._isAllASCII)
((CStrChunk) chk).asciiEntropy(newChk);
else {
//UTF requires Java string methods
BufferedString tmpStr = new BufferedString();
for (int i = 0; i < chk._len; i++) {
if (chk.isNA(i))
newChk.addNA();
else {
String str = chk.atStr(tmpStr, i).toString();
newChk.addNum(calcEntropy(str));
}
}
}
}
}.doAll(new byte[] { Vec.T_NUM }, vec).outputFrame().anyVec();
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class AstBinOp method frame_op_scalar.
/**
* Auto-widen the scalar to every element of the frame
*/
private ValFrame frame_op_scalar(Frame fr, final String str) {
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] cress) {
BufferedString vstr = new BufferedString();
for (int c = 0; c < chks.length; c++) {
Chunk chk = chks[c];
NewChunk cres = cress[c];
Vec vec = chk.vec();
// String Vectors: apply str_op as BufferedStrings to all elements
if (vec.isString()) {
final BufferedString conStr = new BufferedString(str);
for (int i = 0; i < chk._len; i++) cres.addNum(str_op(chk.atStr(vstr, i), conStr));
} else if (vec.isCategorical()) {
// categorical Vectors: convert string to domain value; apply op (not
// str_op). Not sure what the "right" behavior here is, can
// easily argue that should instead apply str_op to the categorical
// string domain value - except that this whole operation only
// makes sense for EQ/NE, and is much faster when just comparing
// doubles vs comparing strings. Note that if the string is not
// part of the categorical domain, the find op returns -1 which is never
// equal to any categorical dense integer (which are always 0+).
final double d = (double) ArrayUtils.find(vec.domain(), str);
for (int i = 0; i < chk._len; i++) cres.addNum(op(chk.atd(i), d));
} else {
// mixing string and numeric
// false or true only
final double d = op(1, 2);
for (int i = 0; i < chk._len; i++) cres.addNum(d);
}
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
return new ValFrame(res);
}
Aggregations