Search in sources :

Example 1 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class KMeansModel method predictScoreImpl.

@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
    if (!_parms._pred_indicator) {
        return super.predictScoreImpl(orig, adaptedFr, destination_key, j, computeMetrics);
    } else {
        final int len = _output._k[_output._k.length - 1];
        String prefix = "cluster_";
        Frame adaptFrm = new Frame(adaptedFr);
        for (int c = 0; c < len; c++) adaptFrm.add(prefix + Double.toString(c + 1), adaptFrm.anyVec().makeZero());
        new MRTask() {

            @Override
            public void map(Chunk[] chks) {
                if (isCancelled() || j != null && j.stop_requested())
                    return;
                double[] tmp = new double[_output._names.length];
                double[] preds = new double[len];
                for (int row = 0; row < chks[0]._len; row++) {
                    Arrays.fill(preds, 0);
                    double[] p = score_indicator(chks, row, tmp, preds);
                    for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
                }
                if (j != null)
                    j.update(1);
            }
        }.doAll(adaptFrm);
        // Return the predicted columns
        int x = _output._names.length, y = adaptFrm.numCols();
        // this will call vec_impl() and we cannot call the delete() below just yet
        Frame f = adaptFrm.extractFrame(x, y);
        f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
        DKV.put(f);
        makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
        return f;
    }
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk)

Example 2 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class PCAModel method predictScoreImpl.

@Override
protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
    Frame adaptFrm = new Frame(adaptedFr);
    for (int i = 0; i < _parms._k; i++) adaptFrm.add("PC" + String.valueOf(i + 1), adaptFrm.anyVec().makeZero());
    new MRTask() {

        @Override
        public void map(Chunk[] chks) {
            if (isCancelled() || j != null && j.stop_requested())
                return;
            double[] tmp = new double[_output._names.length];
            double[] preds = new double[_parms._k];
            for (int row = 0; row < chks[0]._len; row++) {
                double[] p = score0(chks, row, tmp, preds);
                for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
            }
            if (j != null)
                j.update(1);
        }
    }.doAll(adaptFrm);
    // Return the projection into principal component space
    int x = _output._names.length, y = adaptFrm.numCols();
    // this will call vec_impl() and we cannot call the delete() below just yet
    Frame f = adaptFrm.extractFrame(x, y);
    f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
    DKV.put(f);
    makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
    return f;
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk)

Example 3 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class MakeGLMModelHandler method oneHot.

public static Frame oneHot(Frame fr, String[] interactions, boolean useAll, boolean standardize, final boolean interactionsOnly, final boolean skipMissing) {
    final DataInfo dinfo = new DataInfo(fr, null, 1, useAll, standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, skipMissing, false, false, false, false, false, interactions);
    Frame res;
    if (interactionsOnly) {
        if (null == dinfo._interactionVecs)
            throw new IllegalArgumentException("no interactions");
        int noutputs = 0;
        final int[] colIds = new int[dinfo._interactionVecs.length];
        final int[] offsetIds = new int[dinfo._interactionVecs.length];
        int idx = 0;
        String[] coefNames = dinfo.coefNames();
        for (int i : dinfo._interactionVecs) noutputs += (offsetIds[idx++] = ((InteractionWrappedVec) dinfo._adaptedFrame.vec(i)).expandedLength());
        String[] names = new String[noutputs];
        int offset = idx = 0;
        int namesIdx = 0;
        for (int i = 0; i < dinfo._adaptedFrame.numCols(); ++i) {
            Vec v = dinfo._adaptedFrame.vec(i);
            if (v instanceof InteractionWrappedVec) {
                // ding! start copying coefNames into names while offset < colIds[idx+1]
                colIds[idx] = offset;
                for (int nid = 0; nid < offsetIds[idx]; ++nid) names[namesIdx++] = coefNames[offset++];
                idx++;
                // no more interaciton vecs left
                if (idx > dinfo._interactionVecs.length)
                    break;
            } else {
                if (v.isCategorical())
                    offset += v.domain().length - (useAll ? 0 : 1);
                else
                    offset++;
            }
        }
        res = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                DataInfo.Row r = dinfo.newDenseRow();
                for (int i = 0; i < cs[0]._len; ++i) {
                    r = dinfo.extractDenseRow(cs, i, r);
                    if (skipMissing && r.isBad())
                        continue;
                    int newChkIdx = 0;
                    for (int idx = 0; idx < colIds.length; ++idx) {
                        int startOffset = colIds[idx];
                        for (int start = startOffset; start < (startOffset + offsetIds[idx]); ++start) ncs[newChkIdx++].addNum(r.get(start));
                    }
                }
            }
        }.doAll(noutputs, Vec.T_NUM, dinfo._adaptedFrame).outputFrame(Key.make(), names, null);
    } else {
        byte[] types = new byte[dinfo.fullN()];
        Arrays.fill(types, Vec.T_NUM);
        res = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                DataInfo.Row r = dinfo.newDenseRow();
                for (int i = 0; i < cs[0]._len; ++i) {
                    r = dinfo.extractDenseRow(cs, i, r);
                    if (skipMissing && r.isBad())
                        continue;
                    for (int n = 0; n < ncs.length; ++n) ncs[n].addNum(r.get(n));
                }
            }
        }.doAll(types, dinfo._adaptedFrame.vecs()).outputFrame(Key.make("OneHot" + Key.make().toString()), dinfo.coefNames(), null);
    }
    dinfo.dropInteractions();
    dinfo.remove();
    return res;
}
Also used : DataInfo(hex.DataInfo) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 4 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class RemoveAllHandler method remove.

// called through reflection by RequestServer
@SuppressWarnings("unused")
public RemoveAllV3 remove(int version, RemoveAllV3 u) {
    Log.info("Removing all objects");
    Futures fs = new Futures();
    // Cancel and remove leftover running jobs
    for (Job j : Job.jobs()) {
        j.stop_requested();
        j.remove(fs);
    }
    // Wipe out any and all session info
    if (RapidsHandler.SESSIONS != null) {
        for (String k : RapidsHandler.SESSIONS.keySet()) (RapidsHandler.SESSIONS.get(k)).endQuietly(null);
        RapidsHandler.SESSIONS.clear();
    }
    fs.blockForPending();
    // Bulk brainless key removal.  Completely wipes all Keys without regard.
    new MRTask(H2O.MIN_HI_PRIORITY) {

        @Override
        public void setupLocal() {
            H2O.raw_clear();
            water.fvec.Vec.ESPC.clear();
        }
    }.doAllNodes();
    // Wipe the backing store without regard as well
    H2O.getPM().getIce().cleanUp();
    Log.info("Finished removing objects");
    return u;
}
Also used : Futures(water.Futures) MRTask(water.MRTask) Job(water.Job)

Example 5 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstCut method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    double[] cuts = check(asts[2]);
    Arrays.sort(cuts);
    String[] labels = check2(asts[3]);
    final boolean lowest = asts[4].exec(env).getNum() == 1;
    final boolean rite = asts[5].exec(env).getNum() == 1;
    // cap at 12
    final int digits = Math.min((int) asts[6].exec(env).getNum(), 12);
    if (fr.vecs().length != 1 || fr.vecs()[0].isCategorical())
        throw new IllegalArgumentException("First argument must be a numeric column vector");
    double fmin = fr.anyVec().min();
    double fmax = fr.anyVec().max();
    // c(0,10,100) -> 2 bins (0,10] U (10, 100]
    int nbins = cuts.length - 1;
    double width;
    if (nbins == 0) {
        if (cuts[0] < 2)
            throw new IllegalArgumentException("The number of cuts must be >= 2. Got: " + cuts[0]);
        // in this case, cut the vec into _cuts[0] many pieces of equal length
        nbins = (int) Math.floor(cuts[0]);
        width = (fmax - fmin) / nbins;
        cuts = new double[nbins];
        cuts[0] = fmin - 0.001 * (fmax - fmin);
        for (int i = 1; i < cuts.length; ++i) cuts[i] = (i == cuts.length - 1) ? (fmax + 0.001 * (fmax - fmin)) : (fmin + i * width);
    }
    // if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
    if (labels != null && labels.length != nbins)
        throw new IllegalArgumentException("`labels` vector does not match the number of cuts.");
    // Construct domain names from _labels or bin intervals if _labels is null
    final double[] cutz = cuts;
    // first round _cuts to dig.lab decimals: example floor(2.676*100 + 0.5) / 100
    for (int i = 0; i < cuts.length; ++i) cuts[i] = Math.floor(cuts[i] * Math.pow(10, digits) + 0.5) / Math.pow(10, digits);
    String[][] domains = new String[1][nbins];
    if (labels == null) {
        domains[0][0] = (lowest ? "[" : left(rite)) + cuts[0] + "," + cuts[1] + rite(rite);
        for (int i = 1; i < (cuts.length - 1); ++i) domains[0][i] = left(rite) + cuts[i] + "," + cuts[i + 1] + rite(rite);
    } else
        domains[0] = labels;
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            int rows = c._len;
            for (int r = 0; r < rows; ++r) {
                double x = c.atd(r);
                if (Double.isNaN(x) || (lowest && x < cutz[0]) || (!lowest && (x < cutz[0] || MathUtils.equalsWithinOneSmallUlp(x, cutz[0]))) || (rite && x > cutz[cutz.length - 1]) || (!rite && (x > cutz[cutz.length - 1] || MathUtils.equalsWithinOneSmallUlp(x, cutz[cutz.length - 1]))))
                    nc.addNum(Double.NaN);
                else {
                    for (int i = 1; i < cutz.length; ++i) {
                        if (rite) {
                            if (x <= cutz[i]) {
                                nc.addNum(i - 1);
                                break;
                            }
                        } else if (x < cutz[i]) {
                            nc.addNum(i - 1);
                            break;
                        }
                    }
                }
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr.names(), domains);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1