use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstUniOp method exec.
@Override
public Val exec(Val... args) {
Val val = args[1];
switch(val.type()) {
case Val.NUM:
return new ValNum(op(val.getNum()));
case Val.FRM:
Frame fr = val.getFrame();
for (int i = 0; i < fr.numCols(); i++) if (!fr.vec(i).isNumeric())
throw new IllegalArgumentException("Operator " + str() + "() cannot be applied to non-numeric column " + fr.name(i));
// Get length of columns in fr and append `op(colName)`. For example, a column named "income" that had
// a log transformation would now be changed to `log(income)`.
String[] newNames = new String[fr.numCols()];
for (int i = 0; i < newNames.length; i++) {
newNames[i] = str() + "(" + fr.name(i) + ")";
}
return new ValFrame(new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
for (int col = 0; col < cs.length; col++) {
Chunk c = cs[col];
NewChunk nc = ncs[col];
for (int i = 0; i < c._len; i++) nc.addNum(op(c.atd(i)));
}
}
}.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
case Val.ROW:
double[] ds = new double[val.getRow().length];
for (int i = 0; i < ds.length; ++i) ds[i] = op(val.getRow()[i]);
String[] names = ((ValRow) val).getNames().clone();
return new ValRow(ds, names);
default:
throw H2O.unimpl("unop unimpl: " + val.getClass());
}
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstHist method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// stack is [ ..., ary, breaks]
// handle the breaks
Frame fr2;
Frame f = stk.track(asts[1].exec(env)).getFrame();
if (f.numCols() != 1)
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
Vec vec = f.anyVec();
if (!vec.isNumeric())
throw new IllegalArgumentException("Hist only applies to single numeric columns.");
//TODO Add case when vec is a constant numeric
if (vec.isConst())
throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
AstRoot a = asts[2];
String algo = null;
int numBreaks = -1;
double[] breaks = null;
if (a instanceof AstStr)
algo = a.str().toLowerCase();
else if (a instanceof AstNumList)
breaks = ((AstNumList) a).expand();
else if (a instanceof AstNum)
numBreaks = (int) a.exec(env).getNum();
AstHist.HistTask t;
double h;
double x1 = vec.max();
double x0 = vec.min();
if (breaks != null)
t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
else if (algo != null) {
switch(algo) {
case "sturges":
numBreaks = sturges(vec);
h = (x1 - x0) / numBreaks;
break;
case "rice":
numBreaks = rice(vec);
h = (x1 - x0) / numBreaks;
break;
case "sqrt":
numBreaks = sqrt(vec);
h = (x1 - x0) / numBreaks;
break;
case "doane":
numBreaks = doane(vec);
h = (x1 - x0) / numBreaks;
break;
case "scott":
h = scotts_h(vec);
numBreaks = scott(vec, h);
// special bin width computation
break;
case "fd":
h = fds_h(vec);
numBreaks = fd(vec, h);
// special bin width computation
break;
default:
numBreaks = sturges(vec);
// just do sturges even if junk passed in
h = (x1 - x0) / numBreaks;
}
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
} else {
h = (x1 - x0) / numBreaks;
t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
}
// wanna make a new frame here [breaks,counts,mids]
final double[] brks = t._breaks;
final long[] cnts = t._counts;
final double[] mids_true = t._mids;
final double[] mids = new double[t._breaks.length - 1];
for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
Vec layoutVec = Vec.makeZero(brks.length);
fr2 = new MRTask() {
@Override
public void map(Chunk[] c, NewChunk[] nc) {
int start = (int) c[0].start();
for (int i = 0; i < c[0]._len; ++i) {
nc[0].addNum(brks[i + start]);
if (i == 0) {
nc[1].addNA();
nc[2].addNA();
nc[3].addNA();
} else {
nc[1].addNum(cnts[(i - 1) + start]);
nc[2].addNum(mids_true[(i - 1) + start]);
nc[3].addNum(mids[(i - 1) + start]);
}
}
}
}.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
layoutVec.remove();
return new ValFrame(fr2);
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class BinaryMerge method chunksCompressAndStore.
// compress all chunks and store them
private void chunksCompressAndStore(final int nbatch, final int numColsInResult, final double[][][] frameLikeChunks) {
// compress all chunks and store them
Futures fs = new Futures();
for (int col = 0; col < numColsInResult; col++) {
for (int b = 0; b < nbatch; b++) {
Chunk ck = new NewChunk(frameLikeChunks[col][b]).compress();
DKV.put(getKeyForMSBComboPerCol(_leftSB._msb, _riteSB._msb, col, b), ck, fs, true);
//free mem as early as possible (it's now in the store)
frameLikeChunks[col][b] = null;
}
}
fs.blockForPending();
}
use of water.fvec.NewChunk in project h2o-3 by h2oai.
the class AstLs method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
ArrayList<String> domain = new ArrayList<>();
Futures fs = new Futures();
AppendableVec av = new AppendableVec(Vec.VectorGroup.VG_LEN1.addVec(), Vec.T_CAT);
NewChunk keys = new NewChunk(av, 0);
int r = 0;
for (Key key : KeySnapshot.globalSnapshot().keys()) {
keys.addCategorical(r++);
domain.add(key.toString());
}
String[] key_domain = domain.toArray(new String[domain.size()]);
av.setDomain(key_domain);
keys.close(fs);
// c0 is the row index vec
Vec c0 = av.layout_and_close(fs);
fs.blockForPending();
return new ValFrame(new Frame(Key.<Frame>make("h2o_ls"), new String[] { "key" }, new Vec[] { c0 }));
}
use of water.fvec.NewChunk in project h2o-2 by h2oai.
the class FrameTask method map.
/**
* Extracts the values, applies standardization/normalization to numerics, adds appropriate offsets to categoricals,
* and adapts response according to the CaseMode/CaseValue if set.
*/
@Override
public final void map(Chunk[] chunks, NewChunk[] outputs) {
if (_jobKey != null && !Job.isRunning(_jobKey))
throw new JobCancelledException();
final int nrows = chunks[0]._len;
final long offset = chunks[0]._start;
chunkInit();
double[] nums = MemoryManager.malloc8d(_dinfo._nums);
int[] cats = MemoryManager.malloc4(_dinfo._cats);
double[] response = _dinfo._responses == 0 ? null : MemoryManager.malloc8d(_dinfo._responses);
int start = 0;
int end = nrows;
//random generator for skipping rows
Random skip_rng = null;
//Example:
// _useFraction = 0.8 -> 1 repeat with fraction = 0.8
// _useFraction = 1.0 -> 1 repeat with fraction = 1.0
// _useFraction = 1.1 -> 2 repeats with fraction = 0.55
// _useFraction = 2.1 -> 3 repeats with fraction = 0.7
// _useFraction = 3.0 -> 3 repeats with fraction = 1.0
final int repeats = (int) Math.ceil(_useFraction);
final float fraction = _useFraction / repeats;
if (fraction < 1.0)
skip_rng = water.util.Utils.getDeterRNG(new Random().nextLong());
long[] shuf_map = null;
if (_shuffle) {
shuf_map = new long[end - start];
for (int i = 0; i < shuf_map.length; ++i) shuf_map[i] = start + i;
Utils.shuffleArray(shuf_map, new Random().nextLong());
}
long num_processed_rows = 0;
for (int rrr = 0; rrr < repeats; ++rrr) {
OUTER: for (int rr = start; rr < end; ++rr) {
final int r = shuf_map != null ? (int) shuf_map[rr - start] : rr;
final long lr = r + chunks[0]._start;
if ((_dinfo._nfolds > 0 && (lr % _dinfo._nfolds) == _dinfo._foldId) || (skip_rng != null && skip_rng.nextFloat() > fraction))
continue;
//count rows with missing values even if they are skipped
++num_processed_rows;
// skip rows with NAs!
for (Chunk c : chunks) if (skipMissing() && c.isNA0(r))
continue OUTER;
int i = 0, ncats = 0;
for (; i < _dinfo._cats; ++i) {
int c;
if (chunks[i].isNA0(r)) {
//missing value turns into extra (last) factor
cats[ncats++] = (_dinfo._catOffsets[i + 1] - 1);
} else {
c = (int) chunks[i].at80(r);
if (_dinfo._catLvls != null) {
// some levels are ignored?
c = Arrays.binarySearch(_dinfo._catLvls[i], c);
if (c >= 0)
cats[ncats++] = c + _dinfo._catOffsets[i];
} else if (_dinfo._useAllFactorLevels)
cats[ncats++] = c + _dinfo._catOffsets[i];
else if (c != 0)
cats[ncats++] = c + _dinfo._catOffsets[i] - 1;
}
}
final int n = chunks.length - _dinfo._responses;
for (; i < n; ++i) {
//can be NA if skipMissing() == false
double d = chunks[i].at0(r);
if (_dinfo._normSub != null)
d -= _dinfo._normSub[i - _dinfo._cats];
if (_dinfo._normMul != null)
d *= _dinfo._normMul[i - _dinfo._cats];
nums[i - _dinfo._cats] = d;
}
for (i = 0; i < _dinfo._responses; ++i) {
response[i] = chunks[chunks.length - _dinfo._responses + i].at0(r);
if (_dinfo._normRespSub != null)
response[i] -= _dinfo._normRespSub[i];
if (_dinfo._normRespMul != null)
response[i] *= _dinfo._normRespMul[i];
// skip rows without a valid response (no supervised training possible)
if (Double.isNaN(response[i]))
continue OUTER;
}
long seed = offset + rrr * (end - start) + r;
if (outputs != null && outputs.length > 0)
processRow(seed, nums, ncats, cats, response, outputs);
else
processRow(seed, nums, ncats, cats, response);
}
}
chunkDone(num_processed_rows);
}
Aggregations