Search in sources :

Example 16 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstUniOp method exec.

@Override
public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            for (int i = 0; i < fr.numCols(); i++) if (!fr.vec(i).isNumeric())
                throw new IllegalArgumentException("Operator " + str() + "() cannot be applied to non-numeric column " + fr.name(i));
            // Get length of columns in fr and append `op(colName)`. For example, a column named "income" that had
            // a log transformation would now be changed to `log(income)`.
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = str() + "(" + fr.name(i) + ")";
            }
            return new ValFrame(new MRTask() {

                @Override
                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(op(c.atd(i)));
                    }
                }
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.ROW:
            double[] ds = new double[val.getRow().length];
            for (int i = 0; i < ds.length; ++i) ds[i] = op(val.getRow()[i]);
            String[] names = ((ValRow) val).getNames().clone();
            return new ValRow(ds, names);
        default:
            throw H2O.unimpl("unop unimpl: " + val.getClass());
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) MRTask(water.MRTask)

Example 17 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstHist method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // stack is [ ..., ary, breaks]
    // handle the breaks
    Frame fr2;
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    Vec vec = f.anyVec();
    if (!vec.isNumeric())
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    //TODO Add case when vec is a constant numeric
    if (vec.isConst())
        throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
    AstRoot a = asts[2];
    String algo = null;
    int numBreaks = -1;
    double[] breaks = null;
    if (a instanceof AstStr)
        algo = a.str().toLowerCase();
    else if (a instanceof AstNumList)
        breaks = ((AstNumList) a).expand();
    else if (a instanceof AstNum)
        numBreaks = (int) a.exec(env).getNum();
    AstHist.HistTask t;
    double h;
    double x1 = vec.max();
    double x0 = vec.min();
    if (breaks != null)
        t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
    else if (algo != null) {
        switch(algo) {
            case "sturges":
                numBreaks = sturges(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "rice":
                numBreaks = rice(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "sqrt":
                numBreaks = sqrt(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "doane":
                numBreaks = doane(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "scott":
                h = scotts_h(vec);
                numBreaks = scott(vec, h);
                // special bin width computation
                break;
            case "fd":
                h = fds_h(vec);
                numBreaks = fd(vec, h);
                // special bin width computation
                break;
            default:
                numBreaks = sturges(vec);
                // just do sturges even if junk passed in
                h = (x1 - x0) / numBreaks;
        }
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    } else {
        h = (x1 - x0) / numBreaks;
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    }
    // wanna make a new frame here [breaks,counts,mids]
    final double[] brks = t._breaks;
    final long[] cnts = t._counts;
    final double[] mids_true = t._mids;
    final double[] mids = new double[t._breaks.length - 1];
    for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
    Vec layoutVec = Vec.makeZero(brks.length);
    fr2 = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] nc) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                nc[0].addNum(brks[i + start]);
                if (i == 0) {
                    nc[1].addNA();
                    nc[2].addNA();
                    nc[3].addNA();
                } else {
                    nc[1].addNum(cnts[(i - 1) + start]);
                    nc[2].addNum(mids_true[(i - 1) + start]);
                    nc[3].addNum(mids[(i - 1) + start]);
                }
            }
        }
    }.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
    layoutVec.remove();
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) Vec(water.fvec.Vec) AstStr(water.rapids.ast.params.AstStr) MRTask(water.MRTask) AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList)

Example 18 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class BinaryMerge method chunksCompressAndStore.

// compress all chunks and store them
private void chunksCompressAndStore(final int nbatch, final int numColsInResult, final double[][][] frameLikeChunks) {
    // compress all chunks and store them
    Futures fs = new Futures();
    for (int col = 0; col < numColsInResult; col++) {
        for (int b = 0; b < nbatch; b++) {
            Chunk ck = new NewChunk(frameLikeChunks[col][b]).compress();
            DKV.put(getKeyForMSBComboPerCol(_leftSB._msb, _riteSB._msb, col, b), ck, fs, true);
            //free mem as early as possible (it's now in the store)
            frameLikeChunks[col][b] = null;
        }
    }
    fs.blockForPending();
}
Also used : Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 19 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstLs method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    ArrayList<String> domain = new ArrayList<>();
    Futures fs = new Futures();
    AppendableVec av = new AppendableVec(Vec.VectorGroup.VG_LEN1.addVec(), Vec.T_CAT);
    NewChunk keys = new NewChunk(av, 0);
    int r = 0;
    for (Key key : KeySnapshot.globalSnapshot().keys()) {
        keys.addCategorical(r++);
        domain.add(key.toString());
    }
    String[] key_domain = domain.toArray(new String[domain.size()]);
    av.setDomain(key_domain);
    keys.close(fs);
    // c0 is the row index vec
    Vec c0 = av.layout_and_close(fs);
    fs.blockForPending();
    return new ValFrame(new Frame(Key.<Frame>make("h2o_ls"), new String[] { "key" }, new Vec[] { c0 }));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Futures(water.Futures) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) ArrayList(java.util.ArrayList) AppendableVec(water.fvec.AppendableVec) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 20 with NewChunk

use of water.fvec.NewChunk in project h2o-2 by h2oai.

the class FrameTask method map.

/**
   * Extracts the values, applies standardization/normalization to numerics, adds appropriate offsets to categoricals,
   * and adapts response according to the CaseMode/CaseValue if set.
   */
@Override
public final void map(Chunk[] chunks, NewChunk[] outputs) {
    if (_jobKey != null && !Job.isRunning(_jobKey))
        throw new JobCancelledException();
    final int nrows = chunks[0]._len;
    final long offset = chunks[0]._start;
    chunkInit();
    double[] nums = MemoryManager.malloc8d(_dinfo._nums);
    int[] cats = MemoryManager.malloc4(_dinfo._cats);
    double[] response = _dinfo._responses == 0 ? null : MemoryManager.malloc8d(_dinfo._responses);
    int start = 0;
    int end = nrows;
    //random generator for skipping rows
    Random skip_rng = null;
    //Example:
    // _useFraction = 0.8 -> 1 repeat with fraction = 0.8
    // _useFraction = 1.0 -> 1 repeat with fraction = 1.0
    // _useFraction = 1.1 -> 2 repeats with fraction = 0.55
    // _useFraction = 2.1 -> 3 repeats with fraction = 0.7
    // _useFraction = 3.0 -> 3 repeats with fraction = 1.0
    final int repeats = (int) Math.ceil(_useFraction);
    final float fraction = _useFraction / repeats;
    if (fraction < 1.0)
        skip_rng = water.util.Utils.getDeterRNG(new Random().nextLong());
    long[] shuf_map = null;
    if (_shuffle) {
        shuf_map = new long[end - start];
        for (int i = 0; i < shuf_map.length; ++i) shuf_map[i] = start + i;
        Utils.shuffleArray(shuf_map, new Random().nextLong());
    }
    long num_processed_rows = 0;
    for (int rrr = 0; rrr < repeats; ++rrr) {
        OUTER: for (int rr = start; rr < end; ++rr) {
            final int r = shuf_map != null ? (int) shuf_map[rr - start] : rr;
            final long lr = r + chunks[0]._start;
            if ((_dinfo._nfolds > 0 && (lr % _dinfo._nfolds) == _dinfo._foldId) || (skip_rng != null && skip_rng.nextFloat() > fraction))
                continue;
            //count rows with missing values even if they are skipped
            ++num_processed_rows;
            // skip rows with NAs!
            for (Chunk c : chunks) if (skipMissing() && c.isNA0(r))
                continue OUTER;
            int i = 0, ncats = 0;
            for (; i < _dinfo._cats; ++i) {
                int c;
                if (chunks[i].isNA0(r)) {
                    //missing value turns into extra (last) factor
                    cats[ncats++] = (_dinfo._catOffsets[i + 1] - 1);
                } else {
                    c = (int) chunks[i].at80(r);
                    if (_dinfo._catLvls != null) {
                        // some levels are ignored?
                        c = Arrays.binarySearch(_dinfo._catLvls[i], c);
                        if (c >= 0)
                            cats[ncats++] = c + _dinfo._catOffsets[i];
                    } else if (_dinfo._useAllFactorLevels)
                        cats[ncats++] = c + _dinfo._catOffsets[i];
                    else if (c != 0)
                        cats[ncats++] = c + _dinfo._catOffsets[i] - 1;
                }
            }
            final int n = chunks.length - _dinfo._responses;
            for (; i < n; ++i) {
                //can be NA if skipMissing() == false
                double d = chunks[i].at0(r);
                if (_dinfo._normSub != null)
                    d -= _dinfo._normSub[i - _dinfo._cats];
                if (_dinfo._normMul != null)
                    d *= _dinfo._normMul[i - _dinfo._cats];
                nums[i - _dinfo._cats] = d;
            }
            for (i = 0; i < _dinfo._responses; ++i) {
                response[i] = chunks[chunks.length - _dinfo._responses + i].at0(r);
                if (_dinfo._normRespSub != null)
                    response[i] -= _dinfo._normRespSub[i];
                if (_dinfo._normRespMul != null)
                    response[i] *= _dinfo._normRespMul[i];
                // skip rows without a valid response (no supervised training possible)
                if (Double.isNaN(response[i]))
                    continue OUTER;
            }
            long seed = offset + rrr * (end - start) + r;
            if (outputs != null && outputs.length > 0)
                processRow(seed, nums, ncats, cats, response, outputs);
            else
                processRow(seed, nums, ncats, cats, response);
        }
    }
    chunkDone(num_processed_rows);
}
Also used : Random(java.util.Random) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) JobCancelledException(water.Job.JobCancelledException)

Aggregations

NewChunk (water.fvec.NewChunk)42 Frame (water.fvec.Frame)34 Chunk (water.fvec.Chunk)32 ValFrame (water.rapids.vals.ValFrame)23 MRTask (water.MRTask)22 Vec (water.fvec.Vec)19 AppendableVec (water.fvec.AppendableVec)8 BufferedString (water.parser.BufferedString)6 Key (water.Key)5 Futures (water.Futures)4 MRTask2 (water.MRTask2)4 ValNum (water.rapids.vals.ValNum)4 C0DChunk (water.fvec.C0DChunk)3 Val (water.rapids.Val)2 ValRow (water.rapids.vals.ValRow)2 ConfusionMatrix (hex.ConfusionMatrix)1 MersenneTwisterRNG (hex.rng.MersenneTwisterRNG)1 DataInputStream (java.io.DataInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1