Search in sources :

Example 6 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstBinOp method frame_op_frame.

/**
   * Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
   * the rest.  Otherwise the frames must have the same column count, and
   * auto-widen element-by-element.  Short-cut if one frame has zero
   * columns.
   */
private ValFrame frame_op_frame(Frame lf, Frame rt) {
    if (lf.numRows() != rt.numRows()) {
        // special case for broadcasting a single row of data across a frame
        if (lf.numRows() == 1 || rt.numRows() == 1) {
            if (lf.numCols() != rt.numCols())
                throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
            return frame_op_row(lf, rt);
        } else
            throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
    }
    if (lf.numCols() == 0)
        return new ValFrame(lf);
    if (rt.numCols() == 0)
        return new ValFrame(rt);
    if (lf.numCols() == 1 && rt.numCols() > 1)
        return vec_op_frame(lf.vecs()[0], rt);
    if (rt.numCols() == 1 && lf.numCols() > 1)
        return frame_op_vec(lf, rt.vecs()[0]);
    if (lf.numCols() != rt.numCols())
        throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString lfstr = new BufferedString();
            BufferedString rtstr = new BufferedString();
            assert (cress.length << 1) == chks.length;
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                Chunk crt = chks[c + cress.length];
                NewChunk cres = cress[c];
                if (clf.vec().isString())
                    for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
                else
                    for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 7 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstBinOp method frame_op_vec.

private ValFrame frame_op_vec(Frame fr, Vec vec) {
    // Already checked for same rows, non-zero frame
    Frame lf = new Frame(fr);
    lf.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk crt = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 8 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final String str, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString vstr = new BufferedString();
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                Vec vec = chk.vec();
                // String Vectors: apply str_op as BufferedStrings to all elements
                if (vec.isString()) {
                    final BufferedString conStr = new BufferedString(str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(str_op(conStr, chk.atStr(vstr, i)));
                } else if (vec.isCategorical()) {
                    // categorical Vectors: convert string to domain value; apply op (not
                    // str_op).  Not sure what the "right" behavior here is, can
                    // easily argue that should instead apply str_op to the categorical
                    // string domain value - except that this whole operation only
                    // makes sense for EQ/NE, and is much faster when just comparing
                    // doubles vs comparing strings.
                    final double d = (double) ArrayUtils.find(vec.domain(), str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
                } else {
                    // mixing string and numeric
                    // false or true only
                    final double d = op(1, 2);
                    for (int i = 0; i < chk._len; i++) cres.addNum(d);
                }
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 9 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstAsDate method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Vec vec = fr.vecs()[0];
    if (fr.vecs().length != 1 || !(vec.isCategorical() || vec.isString()))
        throw new IllegalArgumentException("as.Date requires a single column of factors or strings");
    final String format = asts[2].exec(env).getStr();
    if (format.isEmpty())
        throw new IllegalArgumentException("as.Date requires a non-empty format string");
    // check the format string more?
    final String[] dom = vec.domain();
    final boolean isStr = dom == null && vec.isString();
    assert isStr || dom != null : "as.Date error: domain is null, but vec is not String";
    Frame fr2 = new MRTask() {

        private transient DateTimeFormatter _fmt;

        @Override
        public void setupLocal() {
            _fmt = ParseTime.forStrptimePattern(format).withZone(ParseTime.getTimezone());
        }

        @Override
        public void map(Chunk c, NewChunk nc) {
            //done on each node in lieu of rewriting DateTimeFormatter as Iced
            String date;
            BufferedString tmpStr = new BufferedString();
            for (int i = 0; i < c._len; ++i) {
                if (!c.isNA(i)) {
                    if (isStr)
                        date = c.atStr(tmpStr, i).toString();
                    else
                        date = dom[(int) c.at8(i)];
                    nc.addNum(DateTime.parse(date, _fmt).getMillis(), 0);
                } else
                    nc.addNA();
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 10 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstBinOp method vec_op_frame.

private ValFrame vec_op_frame(Vec vec, Frame fr) {
    // Already checked for same rows, non-zero frame
    Frame rt = new Frame(fr);
    rt.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk clf = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk crt = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, rt).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

NewChunk (water.fvec.NewChunk)42 Frame (water.fvec.Frame)34 Chunk (water.fvec.Chunk)32 ValFrame (water.rapids.vals.ValFrame)23 MRTask (water.MRTask)22 Vec (water.fvec.Vec)19 AppendableVec (water.fvec.AppendableVec)8 BufferedString (water.parser.BufferedString)6 Key (water.Key)5 Futures (water.Futures)4 MRTask2 (water.MRTask2)4 ValNum (water.rapids.vals.ValNum)4 C0DChunk (water.fvec.C0DChunk)3 Val (water.rapids.Val)2 ValRow (water.rapids.vals.ValRow)2 ConfusionMatrix (hex.ConfusionMatrix)1 MersenneTwisterRNG (hex.rng.MersenneTwisterRNG)1 DataInputStream (java.io.DataInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1