use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstIfElse method apply.
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Val val = stk.track(asts[1].exec(env));
if (val.isNum()) {
// Scalar test, scalar result
double d = val.getNum();
if (Double.isNaN(d))
return new ValNum(Double.NaN);
// exec only 1 of false and true
Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
}
// Frame test. Frame result.
if (val.type() == Val.ROW)
return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
Frame tst = val.getFrame();
// If all zero's, return false and never execute true.
Frame fr = new Frame(tst);
Val tval = null;
for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
tval = exec_check(env, stk, tst, asts[2], fr);
break;
}
final boolean has_tfr = tval != null && tval.isFrame();
final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
final int[] tsIntMap = new int[tst.numCols()];
// If all nonzero's (or NA's), then never execute false.
Val fval = null;
for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
fval = exec_check(env, stk, tst, asts[3], fr);
break;
}
final boolean has_ffr = fval != null && fval.isFrame();
final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
final int[] fsIntMap = new int[tst.numCols()];
String[][] domains = null;
final int[][] maps = new int[tst.numCols()][];
if (fs != null || ts != null) {
// time to build domains...
domains = new String[tst.numCols()][];
if (fs != null && ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
// false => 0; truth => 1
domains[i] = new String[] { fs, ts };
fsIntMap[i] = 0;
tsIntMap[i] = 1;
}
} else if (ts != null) {
for (int i = 0; i < tst.numCols(); ++i) {
if (has_ffr) {
Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = ts;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
tsIntMap[i] = ArrayUtils.find(dom, ts);
domains[i] = dom;
} else
throw H2O.unimpl();
}
} else {
// fs!=null
for (int i = 0; i < tst.numCols(); ++i) {
if (has_tfr) {
Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
if (!v.isCategorical())
throw H2O.unimpl("Column is not categorical.");
String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
dom[dom.length - 1] = fs;
Arrays.sort(dom);
maps[i] = computeMap(v.domain(), dom);
fsIntMap[i] = ArrayUtils.find(dom, fs);
domains[i] = dom;
} else
throw H2O.unimpl();
}
}
}
// Now pick from left-or-right in the new frame
Frame res = new MRTask() {
@Override
public void map(Chunk[] chks, NewChunk[] nchks) {
assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
for (int i = 0; i < nchks.length; i++) {
Chunk ctst = chks[i];
NewChunk res = nchks[i];
for (int row = 0; row < ctst._len; row++) {
double d;
if (ctst.isNA(row))
d = Double.NaN;
else if (ctst.atd(row) == 0)
d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
else
d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
res.addNum(d);
}
}
}
}.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
// flatten domains since they may be larger than needed
if (domains != null) {
for (int i = 0; i < res.numCols(); ++i) {
if (res.vec(i).domain() != null) {
final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
String[] newDomain = new String[dom.length];
for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
new MRTask() {
@Override
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) {
if (!c.isNA(i))
c.set(i, ArrayUtils.find(dom, c.at8(i)));
}
}
}.doAll(res.vec(i));
// needs a DKVput?
res.vec(i).setDomain(newDomain);
}
}
}
return new ValFrame(res);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstMean method rowwiseMean.
/**
* Compute Frame means by rows, and return a frame consisting of a single Vec of means in each row.
*/
private ValFrame rowwiseMean(Frame fr, final boolean na_rm) {
String[] newnames = { "mean" };
Key<Frame> newkey = Key.make();
// Determine how many columns of different types we have
int n_numeric = 0, n_time = 0;
for (Vec vec : fr.vecs()) {
if (vec.isNumeric())
n_numeric++;
if (vec.isTime())
n_time++;
}
// Compute the type of the resulting column: if all columns are TIME then the result is also time; otherwise
// if at least one column is numeric then the result is also numeric.
byte resType = n_numeric > 0 ? Vec.T_NUM : Vec.T_TIME;
// Construct the frame over which the mean should be computed
Frame compFrame = new Frame();
for (int i = 0; i < fr.numCols(); i++) {
Vec vec = fr.vec(i);
if (n_numeric > 0 ? vec.isNumeric() : vec.isTime())
compFrame.add(fr.name(i), vec);
}
Vec anyvec = compFrame.anyVec();
// Take into account certain corner cases
if (anyvec == null) {
Frame res = new Frame(newkey);
anyvec = fr.anyVec();
if (anyvec != null) {
// All columns in the original frame are non-numeric -> return a vec of NAs
res.add("mean", anyvec.makeCon(Double.NaN));
}
// else the original frame is empty, in which case we return an empty frame too
return new ValFrame(res);
}
if (!na_rm && n_numeric < fr.numCols() && n_time < fr.numCols()) {
// If some of the columns are non-numeric and na_rm==false, then the result is a vec of NAs
Frame res = new Frame(newkey, newnames, new Vec[] { anyvec.makeCon(Double.NaN) });
return new ValFrame(res);
}
// Compute the mean over all rows
final int numCols = compFrame.numCols();
Frame res = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk nc) {
for (int i = 0; i < cs[0]._len; i++) {
double d = 0;
int numNaColumns = 0;
for (int j = 0; j < numCols; j++) {
double val = cs[j].atd(i);
if (Double.isNaN(val))
numNaColumns++;
else
d += val;
}
if (na_rm ? numNaColumns < numCols : numNaColumns == 0)
nc.addNum(d / (numCols - numNaColumns));
else
nc.addNum(Double.NaN);
}
}
}.doAll(1, resType, compFrame).outputFrame(newkey, newnames, null);
// Return the result
return new ValFrame(res);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstDiffLag1 method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env).getFrame());
if (fr.numCols() != 1)
throw new IllegalArgumentException("Expected a single column for diff. Got: " + fr.numCols() + " columns.");
if (!fr.anyVec().isNumeric())
throw new IllegalArgumentException("Expected a numeric column for diff. Got: " + fr.anyVec().get_type_str());
final double[] lastElemPerChk = GetLastElemPerChunkTask.get(fr.anyVec());
return new ValFrame(new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
if (c.cidx() == 0)
nc.addNA();
else
nc.addNum(c.atd(0) - lastElemPerChk[c.cidx() - 1]);
for (int row = 1; row < c._len; ++row) nc.addNum(c.atd(row) - c.atd(row - 1));
}
}.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()));
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstIsax method apply.
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
AstRoot n = asts[2];
AstRoot mc = asts[3];
boolean optm_card = asts[4].exec(env).getNum() == 1;
//Check vecs are numeric
for (Vec v : fr.vecs()) {
if (!v.isNumeric()) {
throw new IllegalArgumentException("iSax only applies to numeric columns!");
}
}
int numWords = (int) n.exec(env).getNum();
int maxCardinality = (int) mc.exec(env).getNum();
//Check numWords and maxCardinality are >=0
if (numWords < 0) {
throw new IllegalArgumentException("numWords must be greater than 0!");
}
if (maxCardinality < 0) {
throw new IllegalArgumentException("maxCardinality must be greater than 0!");
}
ArrayList<String> columns = new ArrayList<>();
for (int i = 0; i < numWords; i++) {
columns.add("c" + i);
}
Frame fr2 = new AstIsax.IsaxTask(numWords, maxCardinality).doAll(numWords, Vec.T_NUM, fr).outputFrame(null, columns.toArray(new String[numWords]), null);
int[] maxCards = new int[numWords];
if (optm_card) {
_domain_hm = new double[numWords][maxCardinality];
for (double[] r : _domain_hm) Arrays.fill(r, Double.NaN);
// see if we can reduce the cardinality by checking all unique tokens in all series in a word
for (int i = 0; i < fr2.numCols(); i++) {
String[] domains = fr2.vec(i).toCategoricalVec().domain();
for (int j = 0; j < domains.length; j++) {
_domain_hm[i][j] = Double.valueOf(domains[j]);
}
}
// get the cardinalities of each word
for (int i = 0; i < numWords; i++) {
int cnt = 0;
for (double d : _domain_hm[i]) {
if (Double.isNaN(d))
break;
else
cnt++;
}
maxCards[i] = cnt;
}
Frame fr2_reduced = new AstIsax.IsaxReduceCard(_domain_hm, maxCardinality).doAll(numWords, Vec.T_NUM, fr2).outputFrame(null, columns.toArray(new String[numWords]), null);
Frame fr3 = new AstIsax.IsaxStringTask(maxCards).doAll(1, Vec.T_STR, fr2_reduced).outputFrame(null, new String[] { "iSax_index" }, null);
//Not needed anymore
fr2.delete();
fr3.add(fr2_reduced);
return new ValFrame(fr3);
}
for (int i = 0; i < numWords; ++i) {
maxCards[i] = maxCardinality;
}
Frame fr3 = new AstIsax.IsaxStringTask(maxCards).doAll(1, Vec.T_STR, fr2).outputFrame(null, new String[] { "iSax_index" }, null);
fr3.add(fr2);
return new ValFrame(fr3);
}
use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.
the class AstToLower method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
// Type check
for (Vec v : fr.vecs()) if (!(v.isCategorical() || v.isString()))
throw new IllegalArgumentException("tolower() requires a string or categorical column. " + "Received " + fr.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
// Transform each vec
Vec[] nvs = new Vec[fr.numCols()];
int i = 0;
for (Vec v : fr.vecs()) {
if (v.isCategorical())
nvs[i] = toLowerCategoricalCol(v);
else
nvs[i] = toLowerStringCol(v);
i++;
}
return new ValFrame(new Frame(nvs));
}
Aggregations