Search in sources :

Example 6 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstBinOp method frame_op_frame.

/**
   * Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
   * the rest.  Otherwise the frames must have the same column count, and
   * auto-widen element-by-element.  Short-cut if one frame has zero
   * columns.
   */
private ValFrame frame_op_frame(Frame lf, Frame rt) {
    if (lf.numRows() != rt.numRows()) {
        // special case for broadcasting a single row of data across a frame
        if (lf.numRows() == 1 || rt.numRows() == 1) {
            if (lf.numCols() != rt.numCols())
                throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
            return frame_op_row(lf, rt);
        } else
            throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
    }
    if (lf.numCols() == 0)
        return new ValFrame(lf);
    if (rt.numCols() == 0)
        return new ValFrame(rt);
    if (lf.numCols() == 1 && rt.numCols() > 1)
        return vec_op_frame(lf.vecs()[0], rt);
    if (rt.numCols() == 1 && lf.numCols() > 1)
        return frame_op_vec(lf, rt.vecs()[0]);
    if (lf.numCols() != rt.numCols())
        throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString lfstr = new BufferedString();
            BufferedString rtstr = new BufferedString();
            assert (cress.length << 1) == chks.length;
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                Chunk crt = chks[c + cress.length];
                NewChunk cres = cress[c];
                if (clf.vec().isString())
                    for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
                else
                    for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 7 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final String str, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString vstr = new BufferedString();
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                Vec vec = chk.vec();
                // String Vectors: apply str_op as BufferedStrings to all elements
                if (vec.isString()) {
                    final BufferedString conStr = new BufferedString(str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(str_op(conStr, chk.atStr(vstr, i)));
                } else if (vec.isCategorical()) {
                    // categorical Vectors: convert string to domain value; apply op (not
                    // str_op).  Not sure what the "right" behavior here is, can
                    // easily argue that should instead apply str_op to the categorical
                    // string domain value - except that this whole operation only
                    // makes sense for EQ/NE, and is much faster when just comparing
                    // doubles vs comparing strings.
                    final double d = (double) ArrayUtils.find(vec.domain(), str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
                } else {
                    // mixing string and numeric
                    // false or true only
                    final double d = op(1, 2);
                    for (int i = 0; i < chk._len; i++) cres.addNum(d);
                }
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 8 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstAsDate method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Vec vec = fr.vecs()[0];
    if (fr.vecs().length != 1 || !(vec.isCategorical() || vec.isString()))
        throw new IllegalArgumentException("as.Date requires a single column of factors or strings");
    final String format = asts[2].exec(env).getStr();
    if (format.isEmpty())
        throw new IllegalArgumentException("as.Date requires a non-empty format string");
    // check the format string more?
    final String[] dom = vec.domain();
    final boolean isStr = dom == null && vec.isString();
    assert isStr || dom != null : "as.Date error: domain is null, but vec is not String";
    Frame fr2 = new MRTask() {

        private transient DateTimeFormatter _fmt;

        @Override
        public void setupLocal() {
            _fmt = ParseTime.forStrptimePattern(format).withZone(ParseTime.getTimezone());
        }

        @Override
        public void map(Chunk c, NewChunk nc) {
            //done on each node in lieu of rewriting DateTimeFormatter as Iced
            String date;
            BufferedString tmpStr = new BufferedString();
            for (int i = 0; i < c._len; ++i) {
                if (!c.isNA(i)) {
                    if (isStr)
                        date = c.atStr(tmpStr, i).toString();
                    else
                        date = dom[(int) c.at8(i)];
                    nc.addNum(DateTime.parse(date, _fmt).getMillis(), 0);
                } else
                    nc.addNA();
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 9 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstReplaceAll method replaceAllStringCol.

private Vec replaceAllStringCol(Vec vec, String pat, String rep, boolean ic) {
    final String pattern = pat;
    final String replacement = rep;
    final boolean ignoreCase = ic;
    return new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (// all NAs
            chk instanceof C0DChunk)
                for (int i = 0; i < chk.len(); i++) newChk.addNA();
            else {
                //        if (((CStrChunk)chk)._isAllASCII) { // fast-path operations
                //          ((CStrChunk) chk).asciiReplaceAll(newChk);
                //        } else { //UTF requires Java string methods for accuracy
                BufferedString tmpStr = new BufferedString();
                for (int i = 0; i < chk._len; i++) {
                    if (chk.isNA(i))
                        newChk.addNA();
                    else {
                        if (ignoreCase)
                            newChk.addStr(chk.atStr(tmpStr, i).toString().toLowerCase(Locale.ENGLISH).replaceAll(pattern, replacement));
                        else
                            newChk.addStr(chk.atStr(tmpStr, i).toString().replaceAll(pattern, replacement));
                    }
                }
            }
        }
    }.doAll(new byte[] { Vec.T_STR }, vec).outputFrame().anyVec();
}
Also used : MRTask(water.MRTask) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 10 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstReplaceFirst method replaceFirstStringCol.

private Vec replaceFirstStringCol(Vec vec, String pat, String rep, boolean ic) {
    final String pattern = pat;
    final String replacement = rep;
    final boolean ignoreCase = ic;
    return new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (// all NAs
            chk instanceof C0DChunk)
                for (int i = 0; i < chk.len(); i++) newChk.addNA();
            else {
                //        if (((CStrChunk)chk)._isAllASCII) { // fast-path operations
                //          ((CStrChunk) chk).asciiReplaceFirst(newChk);
                //        } else { //UTF requires Java string methods for accuracy
                BufferedString tmpStr = new BufferedString();
                for (int i = 0; i < chk._len; i++) {
                    if (chk.isNA(i))
                        newChk.addNA();
                    else {
                        if (ignoreCase)
                            newChk.addStr(chk.atStr(tmpStr, i).toString().toLowerCase(Locale.ENGLISH).replaceFirst(pattern, replacement));
                        else
                            newChk.addStr(chk.atStr(tmpStr, i).toString().replaceFirst(pattern, replacement));
                    }
                }
            }
        }
    }.doAll(new byte[] { Vec.T_STR }, vec).outputFrame().anyVec();
}
Also used : MRTask(water.MRTask) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Aggregations

BufferedString (water.parser.BufferedString)43 Frame (water.fvec.Frame)12 Test (org.junit.Test)9 MRTask (water.MRTask)8 Vec (water.fvec.Vec)8 Chunk (water.fvec.Chunk)7 NewChunk (water.fvec.NewChunk)6 ValFrame (water.rapids.vals.ValFrame)5 IcedLong (water.util.IcedLong)5 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 TestFrameBuilder (water.fvec.TestFrameBuilder)2 BackendModel (deepwater.backends.BackendModel)1 BackendParams (deepwater.backends.BackendParams)1 RuntimeOptions (deepwater.backends.RuntimeOptions)1 ImageDataSet (deepwater.datasets.ImageDataSet)1 GenModel (hex.genmodel.GenModel)1 EasyPredictModelWrapper (hex.genmodel.easy.EasyPredictModelWrapper)1