use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstVariance method scalar.
// Scalar covariance for 1 row
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
if (frx.numCols() != fry.numCols())
throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
Vec[] vecxs = frx.vecs();
Vec[] vecys = fry.vecs();
double xmean = 0, ymean = 0, ncols = frx.numCols(), NACount = 0, xval, yval, ss = 0;
for (int r = 0; r < ncols; r++) {
xval = vecxs[r].at(0);
yval = vecys[r].at(0);
if (Double.isNaN(xval) || Double.isNaN(yval))
NACount++;
else {
xmean += xval;
ymean += yval;
}
}
xmean /= (ncols - NACount);
ymean /= (ncols - NACount);
if (NACount != 0) {
if (mode.equals(Mode.AllObs))
throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
if (mode.equals(Mode.Everything))
return new ValNum(Double.NaN);
}
for (int r = 0; r < ncols; r++) {
xval = vecxs[r].at(0);
yval = vecys[r].at(0);
if (!(Double.isNaN(xval) || Double.isNaN(yval)))
ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
}
return new ValNum(ss / (ncols - NACount - 1));
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstAppend method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame dst = stk.track(asts[1].exec(env)).getFrame();
Val vsrc = stk.track(asts[2].exec(env));
String newColName = asts[3].exec(env).getStr();
Vec vec = dst.anyVec();
switch(vsrc.type()) {
case Val.NUM:
vec = vec.makeCon(vsrc.getNum());
break;
case Val.STR:
throw H2O.unimpl();
case Val.FRM:
if (vsrc.getFrame().numCols() != 1)
throw new IllegalArgumentException("Can only append one column");
vec = vsrc.getFrame().anyVec();
break;
default:
throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
}
dst = new Frame(dst._names.clone(), dst.vecs().clone());
dst.add(newColName, vec);
return new ValFrame(dst);
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstRectangleAssign method assign_frame_scalar.
// Boolean assignment with a scalar
private void assign_frame_scalar(Frame dst, int[] cols, Frame rows, Object src, Session ses) {
Vec bool = rows.vec(0);
if (dst.numRows() != rows.numRows()) {
throw new IllegalArgumentException("Frame " + dst._key + " has different number of rows than frame " + rows._key + " (" + dst.numRows() + " vs " + rows.numRows() + ").");
}
// Note: this skips "scalar to Vec" compatibility check because the whole Vec is overwritten
if (bool.isConst() && ((int) bool.min() == 1) && (src instanceof Number)) {
Vec anyVec = dst.anyVec();
assert anyVec != null;
Vec vsrc = anyVec.makeCon((double) src);
for (int col : cols) dst.replace(col, vsrc);
if (dst._key != null)
DKV.put(dst);
return;
}
// Make sure the scalar value is compatible with the target vector
for (int col : cols) {
if (!isScalarCompatible(src, dst.vec(col))) {
throw new IllegalArgumentException("Cannot assign value " + src + " into a vector of type " + dst.vec(col).get_type_str() + ".");
}
}
Vec[] vecs = ses.copyOnWrite(dst, cols);
// Just the selected columns get updated
Vec[] vecs2 = new Vec[cols.length];
for (int i = 0; i < cols.length; i++) vecs2[i] = vecs[cols[i]];
ConditionalAssignTask.doAssign(vecs2, src, rows.vec(0));
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstRectangleAssign method assign_frame_frame.
// Rectangular array copy from src into dst
private void assign_frame_frame(Frame dst, int[] cols, AstNumList rows, Frame src, Session ses) {
// Sanity check
if (cols.length != src.numCols())
throw new IllegalArgumentException("Source and destination frames must have the same count of columns");
long nrows = rows.cnt();
if (src.numRows() != nrows)
throw new IllegalArgumentException("Requires same count of rows in the number-list (" + nrows + ") as in the source (" + src.numRows() + ")");
// optimization happens here on the apply() exit.
if (dst.numRows() == nrows && rows.isDense()) {
for (int i = 0; i < cols.length; i++) dst.replace(cols[i], src.vecs()[i]);
if (dst._key != null)
DKV.put(dst);
return;
}
// Partial update; needs to preserve type, and may need to copy to support
// copy-on-write
Vec[] dvecs = dst.vecs();
final Vec[] svecs = src.vecs();
for (int col = 0; col < cols.length; col++) {
int dtype = dvecs[cols[col]].get_type();
if (dtype != svecs[col].get_type())
throw new IllegalArgumentException("Columns must be the same type; " + "column " + col + ", \'" + dst._names[cols[col]] + "\', is of type " + dvecs[cols[col]].get_type_str() + " and the source is " + svecs[col].get_type_str());
if ((dtype == Vec.T_CAT) && (!Arrays.equals(dvecs[cols[col]].domain(), svecs[col].domain())))
throw new IllegalArgumentException("Cannot assign to a categorical column with a different domain; " + "source column " + src._names[col] + ", target column " + dst._names[cols[col]]);
}
// Handle fast small case
if (nrows <= 1 || (cols.length * nrows) <= 1000) {
// Go parallel for more than 1000 random updates
// Copy dst columns as-needed to allow update-in-place
// Update dst columns
dvecs = ses.copyOnWrite(dst, cols);
// Just these rows
long[] rownums = rows.expand8();
for (int col = 0; col < svecs.length; col++) if (svecs[col].get_type() == Vec.T_STR) {
BufferedString bStr = new BufferedString();
for (int ridx = 0; ridx < rownums.length; ridx++) {
BufferedString s = svecs[col].atStr(bStr, ridx);
dvecs[cols[col]].set(rownums[ridx], s != null ? s.toString() : null);
}
} else {
for (int ridx = 0; ridx < rownums.length; ridx++) dvecs[cols[col]].set(rownums[ridx], svecs[col].at(ridx));
}
return;
}
// Handle large case
Vec[] vecs = ses.copyOnWrite(dst, cols);
// Just the selected columns get updated
Vec[] vecs2 = new Vec[cols.length];
for (int i = 0; i < cols.length; i++) vecs2[i] = vecs[cols[i]];
// Side-effect internal sort; needed for fast row lookup
rows.sort();
new AssignFrameFrameTask(rows, svecs).doAll(vecs2);
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstHist method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// stack is [ ..., ary, breaks]
// handle the breaks
Frame fr2;
Frame f = stk.track(asts[1].exec(env)).getFrame();
if (f.numCols() != 1)
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
Vec vec = f.anyVec();
if (!vec.isNumeric())
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
//TODO Add case when vec is a constant numeric
if (vec.isConst())
throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
AstRoot a = asts[2];
String algo = null;
int numBreaks = -1;
double[] breaks = null;
if (a instanceof AstStr)
algo = a.str().toLowerCase();
else if (a instanceof AstNumList)
breaks = ((AstNumList) a).expand();
else if (a instanceof AstNum)
numBreaks = (int) a.exec(env).getNum();
AstHist.HistTask t;
double h;
double x1 = vec.max();
double x0 = vec.min();
if (breaks != null)
t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
else if (algo != null) {
switch(algo) {
case "sturges":
numBreaks = sturges(vec);
h = (x1 - x0) / numBreaks;
break;
case "rice":
numBreaks = rice(vec);
h = (x1 - x0) / numBreaks;
break;
case "sqrt":
numBreaks = sqrt(vec);
h = (x1 - x0) / numBreaks;
break;
case "doane":
numBreaks = doane(vec);
h = (x1 - x0) / numBreaks;
break;
case "scott":
h = scotts_h(vec);
numBreaks = scott(vec, h);
// special bin width computation
break;
case "fd":
h = fds_h(vec);
numBreaks = fd(vec, h);
// special bin width computation
break;
default:
numBreaks = sturges(vec);
// just do sturges even if junk passed in
h = (x1 - x0) / numBreaks;
}
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
} else {
h = (x1 - x0) / numBreaks;
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
}
// wanna make a new frame here [breaks,counts,mids]
final double[] brks = t._breaks;
final long[] cnts = t._counts;
final double[] mids_true = t._mids;
final double[] mids = new double[t._breaks.length - 1];
for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
Vec layoutVec = Vec.makeZero(brks.length);
fr2 = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] nc) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
nc[0].addNum(brks[i + start]);
if (i == 0) {
nc[1].addNA();
nc[2].addNA();
nc[3].addNA();
} else {
nc[1].addNum(cnts[(i - 1) + start]);
nc[2].addNum(mids_true[(i - 1) + start]);
nc[3].addNum(mids[(i - 1) + start]);
}
}
}
}.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
layoutVec.remove();
return new ValFrame(fr2);
}
Aggregations