Search in sources :

Example 1 with CountedCompleter

use of jsr166y.CountedCompleter in project h2o-3 by h2oai.

the class ScoreBuildHistogram2 method setupLocal.

@Override
public void setupLocal() {
    addToPendingCount(1);
    // Init all the internal tree fields after shipping over the wire
    _tree.init_tree();
    Vec v = _fr2.anyVec();
    assert (v != null);
    _cids = VecUtils.getLocalChunkIds(v);
    _chks = new Chunk[_cids.length][_fr2.numCols()];
    _ys = new double[_cids.length][];
    _ws = new double[_cids.length][];
    _nhs = new int[_cids.length][];
    _rss = new int[_cids.length][];
    long[] espc = v.espc();
    int largestChunkSz = 0;
    for (int i = 1; i < espc.length; ++i) {
        int sz = (int) (espc[i] - espc[i - 1]);
        if (sz > largestChunkSz)
            largestChunkSz = sz;
    }
    final int fLargestChunkSz = largestChunkSz;
    if (_weightIdx == -1) {
        double[] ws = new double[largestChunkSz];
        Arrays.fill(ws, 1);
        Arrays.fill(_ws, ws);
    }
    final AtomicInteger cidx = new AtomicInteger(0);
    // First do the phase 1 on all local data
    new LocalMR(new MrFun() {

        // more or less copied from ScoreBuildHistogram
        private void map(int id, Chunk[] chks) {
            final C4VolatileChunk nids = (C4VolatileChunk) chks[_nidIdx];
            // Pass 1: Score a prior partially-built tree model, and make new Node
            // assignments to every row.  This involves pulling out the current
            // assigned DecidedNode, "scoring" the row against that Node's decision
            // criteria, and assigning the row to a new child UndecidedNode (and
            // giving it an improved prediction).
            int[] nnids;
            if (// Prior pass exists?
            _leaf > 0)
                nnids = score_decide(chks, nids.getValues());
            else {
                // Just flag all the NA rows
                nnids = new int[nids._len];
                int[] is = nids.getValues();
                for (int row = 0; row < nids._len; row++) {
                    if (isDecidedRow(is[row]))
                        nnids[row] = DECIDED_ROW;
                }
            }
            // Pass 2: accumulate all rows, cols into histograms
            // Sort the rows by NID, so we visit all the same NIDs in a row
            // Find the count of unique NIDs in this chunk
            int[] nh = (_nhs[id] = new int[_numLeafs + 1]);
            for (int i : nnids) if (i >= 0)
                nh[i + 1]++;
            // Rollup the histogram of rows-per-NID in this chunk
            for (int i = 0; i < _numLeafs; i++) nh[i + 1] += nh[i];
            // Splat the rows into NID-groups
            int[] rows = (_rss[id] = new int[nnids.length]);
            for (int row = 0; row < nnids.length; row++) if (nnids[row] >= 0)
                rows[nh[nnids[row]]++] = row;
        }

        @Override
        protected void map(int id) {
            Vec[] vecs = _fr2.vecs();
            for (id = cidx.getAndIncrement(); id < _cids.length; id = cidx.getAndIncrement()) {
                int cidx = _cids[id];
                Chunk[] chks = _chks[id];
                for (int i = 0; i < chks.length; ++i) chks[i] = vecs[i].chunkForChunkIdx(cidx);
                map(id, chks);
                chks[_nidIdx].close(cidx, _fs);
                Chunk resChk = chks[_workIdx];
                int len = resChk.len();
                if (resChk instanceof C8DVolatileChunk) {
                    _ys[id] = ((C8DVolatileChunk) resChk).getValues();
                } else
                    _ys[id] = resChk.getDoubles(MemoryManager.malloc8d(len), 0, len);
                if (_weightIdx != -1) {
                    _ws[id] = chks[_weightIdx].getDoubles(MemoryManager.malloc8d(len), 0, len);
                }
            }
        }
    }, new H2O.H2OCountedCompleter(this) {

        public void onCompletion(CountedCompleter cc) {
            final int ncols = _ncols;
            final int[] active_cols = _activeCols == null ? null : new int[Math.max(1, _activeCols.cardinality())];
            int nactive_cols = active_cols == null ? ncols : active_cols.length;
            final int numWrks = _hcs.length * nactive_cols < 16 * 1024 ? H2O.NUMCPUS : Math.min(H2O.NUMCPUS, Math.max(4 * H2O.NUMCPUS / nactive_cols, 1));
            final int rem = H2O.NUMCPUS - numWrks * ncols;
            ScoreBuildHistogram2.this.addToPendingCount(1 + nactive_cols);
            if (active_cols != null) {
                int j = 0;
                for (int i = 0; i < ncols; ++i) if (_activeCols.contains(i))
                    active_cols[j++] = i;
            }
            // MRTask (over columns) launching MrTasks (over number of workers) for each column.
            // We want FJ to start processing all the columns before parallelizing within column to reduce memory overhead.
            // (running single column in n threads means n-copies of the histogram)
            // This is how it works:
            //    1) Outer MRTask walks down it's tree, forking tasks with exponentially decreasing number of columns until reaching its left most leaf for columns 0.
            //       At this point, the local fjq for this thread has a task for processing half of columns at the bottom, followed by task for 1/4 of columns and so on.
            //       Other threads start stealing work from the bottom.
            //    2) forks the leaf task and (because its polling from the top) executes the LocalMr for the column 0.
            // This way we should have columns as equally distributed as possible without resorting to shared priority queue
            new LocalMR(new MrFun() {

                @Override
                protected void map(int c) {
                    c = active_cols == null ? c : active_cols[c];
                    new LocalMR(new ComputeHistoThread(_hcs.length == 0 ? new DHistogram[0] : _hcs[c], c, fLargestChunkSz, new AtomicInteger()), numWrks + (c < rem ? 1 : 0), ScoreBuildHistogram2.this).fork();
                }
            }, nactive_cols, ScoreBuildHistogram2.this).fork();
        }
    }).fork();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CountedCompleter(jsr166y.CountedCompleter)

Example 2 with CountedCompleter

use of jsr166y.CountedCompleter in project h2o-2 by h2oai.

the class FrameSplitter method compute2.

@Override
public void compute2() {
    // Lock all possible data
    dataset.read_lock(jobKey);
    // Create a template vector for each segment
    final Vec[][] templates = makeTemplates(dataset, ratios);
    final int nsplits = templates.length;
    assert nsplits == ratios.length + 1 : "Unexpected number of split templates!";
    // Launch number of distributed FJ for each split part
    final Vec[] datasetVecs = dataset.vecs();
    splits = new Frame[nsplits];
    for (int s = 0; s < nsplits; s++) {
        Frame split = new Frame(destKeys[s], dataset.names(), templates[s]);
        split.delete_and_lock(jobKey);
        splits[s] = split;
    }
    setPendingCount(1);
    H2O.submitTask(new H2OCountedCompleter(FrameSplitter.this) {

        @Override
        public void compute2() {
            setPendingCount(nsplits);
            for (int s = 0; s < nsplits; s++) {
                new FrameSplitTask(new // Completer for this task
                H2OCountedCompleter(// Completer for this task
                this) {

                    @Override
                    public void compute2() {
                    }

                    @Override
                    public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
                        synchronized (FrameSplitter.this) {
                            // synchronized on this since can be accessed from different workers
                            workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1];
                            workersExceptions[workersExceptions.length - 1] = ex;
                        }
                        // we handle the exception so wait perform normal completion
                        tryComplete();
                        return false;
                    }
                }, datasetVecs, ratios, s).asyncExec(splits[s]);
            }
            // complete the computation of nsplits-tasks
            tryComplete();
        }
    });
    // complete the computation of thrown tasks
    tryComplete();
}
Also used : H2OCountedCompleter(water.H2O.H2OCountedCompleter) H2OCountedCompleter(water.H2O.H2OCountedCompleter) CountedCompleter(jsr166y.CountedCompleter)

Example 3 with CountedCompleter

use of jsr166y.CountedCompleter in project h2o-3 by h2oai.

the class RPC method call.

// Make an initial RPC, or re-send a packet.  Always called on 1st send; also
// called on a timeout.
public synchronized RPC<V> call() {
    // Any Completer will not be carried over to remote; add it to the RPC call
    // so completion is signaled after the remote comes back.
    CountedCompleter cc = _dt.getCompleter();
    if (cc != null)
        handleCompleter(cc);
    // If running on self, just submit to queues & do locally
    if (_target == H2O.SELF)
        return handleLocal();
    // Keep a global record, for awhile
    if (_target != null)
        _target.taskPut(_tasknum, this);
    try {
        // Racing Nack rechecked under lock; no need to send retry
        if (_nack)
            return this;
        // We could be racing timeouts-vs-replies.  Blow off timeout if we have an answer.
        if (isDone()) {
            if (_target != null)
                _target.taskRemove(_tasknum);
            return this;
        }
        // send the basic UDP control packet.
        if (!_sentTcp) {
            while (true) {
                // Retry loop for broken TCP sends
                AutoBuffer ab = new AutoBuffer(_target, _dt.priority());
                try {
                    final boolean t;
                    ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND);
                    ab.put(_dt);
                    t = ab.hasTCP();
                    assert sz_check(ab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + ab.size() + " for task#" + _tasknum;
                    // Then close; send final byte
                    ab.close();
                    // Set after close (and any other possible fail)
                    _sentTcp = t;
                    // Break out of retry loop
                    break;
                } catch (AutoBuffer.AutoBufferException e) {
                    Log.info("IOException during RPC call: " + e._ioe.getMessage() + ",  AB=" + ab + ", for task#" + _tasknum + ", waiting and retrying...");
                    ab.drainClose();
                    try {
                        Thread.sleep(500);
                    } catch (InterruptedException ignore) {
                    }
                }
            }
        // end of while(true)
        } else {
            // Else it was sent via TCP in a prior attempt, and we've timed out.
            // This means the caller's ACK/answer probably got dropped and we need
            // him to resend it (or else the caller is still processing our
            // request).  Send a UDP reminder - but with the CLIENT_TCP_SEND flag
            // instead of the UDP send, and no DTask (since it previously went via
            // TCP, no need to resend it).
            AutoBuffer ab = new AutoBuffer(_target, _dt.priority()).putTask(UDP.udp.exec, _tasknum);
            ab.put1(CLIENT_TCP_SEND).close();
        }
        // Double retry until we exceed existing age.  This is the time to delay
        // until we try again.  Note that we come here immediately on creation,
        // so the first doubling happens before anybody does any waiting.  Also
        // note the generous 5sec cap: ping at least every 5 sec.
        _retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT;
        //      UDPTimeOutThread.PENDING.put(_tasknum, this);
        return this;
    } catch (Throwable t) {
        t.printStackTrace();
        throw Log.throwErr(t);
    }
}
Also used : H2OCountedCompleter(water.H2O.H2OCountedCompleter) CountedCompleter(jsr166y.CountedCompleter)

Example 4 with CountedCompleter

use of jsr166y.CountedCompleter in project h2o-2 by h2oai.

the class RPC method call.

// Make an initial RPC, or re-send a packet.  Always called on 1st send; also
// called on a timeout.
public synchronized RPC<V> call() {
    ++_callCnt;
    // add it to the RPC call.
    if (_dt.getCompleter() != null) {
        CountedCompleter cc = _dt.getCompleter();
        assert cc instanceof H2OCountedCompleter;
        boolean alreadyIn = false;
        if (_fjtasks != null)
            for (H2OCountedCompleter hcc : _fjtasks) if (hcc == cc)
                alreadyIn = true;
        if (!alreadyIn)
            addCompleter((H2OCountedCompleter) cc);
        _dt.setCompleter(null);
    }
    // If running on self, just submit to queues & do locally
    if (_target == H2O.SELF) {
        assert _dt.getCompleter() == null;
        _dt.setCompleter(new H2O.H2OCallback<DTask>() {

            @Override
            public void callback(DTask dt) {
                assert dt == _dt;
                synchronized (RPC.this) {
                    // F/J guarentees called once
                    assert !_done;
                    _done = true;
                    RPC.this.notifyAll();
                }
                doAllCompletions();
            }

            @Override
            public boolean onExceptionalCompletion(Throwable ex, CountedCompleter dt) {
                assert dt == _dt;
                synchronized (RPC.this) {
                    // Filter down to 1st exceptional completion
                    if (_done)
                        return true;
                    _dt.setException(ex);
                    // must be set as the last thing before notify, the waiting thread can wake up any at any time!
                    _done = true;
                    RPC.this.notifyAll();
                }
                doAllCompletions();
                return true;
            }
        });
        H2O.submitTask(_dt);
        return this;
    }
    // Keep a global record, for awhile
    if (_target != null)
        _target.taskPut(_tasknum, this);
    try {
        // We could be racing timeouts-vs-replies.  Blow off timeout if we have an answer.
        if (isDone()) {
            if (_target != null)
                _target.taskRemove(_tasknum);
            return this;
        }
        // send the basic UDP control packet.
        if (!_sentTcp) {
            // Ship the UDP packet!
            while (true) {
                // Retry loop for broken TCP sends
                AutoBuffer ab = new AutoBuffer(_target);
                try {
                    ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND).put(_dt);
                    boolean t = ab.hasTCP();
                    assert sz_check(ab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + ab.size() + " for task#" + _tasknum;
                    // Then close; send final byte
                    ab.close();
                    // Set after close (and any other possible fail)
                    _sentTcp = t;
                    // Break out of retry loop
                    break;
                } catch (AutoBuffer.AutoBufferException e) {
                    Log.info_no_DKV(Log.Tag.Sys.WATER, "IOException during RPC call: " + e._ioe.getMessage() + ",  AB=" + ab + ", for task#" + _tasknum + ", waiting and retrying...");
                    ab.drainClose();
                    try {
                        Thread.sleep(500);
                    } catch (InterruptedException ignore) {
                    }
                }
            }
        // end of while(true)
        } else {
            // Else it was sent via TCP in a prior attempt, and we've timed out.
            // This means the caller's ACK/answer probably got dropped and we need
            // him to resend it (or else the caller is still processing our
            // request).  Send a UDP reminder - but with the CLIENT_TCP_SEND flag
            // instead of the UDP send, and no DTask (since it previously went via
            // TCP, no need to resend it).
            AutoBuffer ab = new AutoBuffer(_target).putTask(UDP.udp.exec, _tasknum);
            ab.put1(CLIENT_TCP_SEND).close();
        }
        // Double retry until we exceed existing age.  This is the time to delay
        // until we try again.  Note that we come here immediately on creation,
        // so the first doubling happens before anybody does any waiting.  Also
        // note the generous 5sec cap: ping at least every 5 sec.
        _retry += (_retry < 5000) ? _retry : 5000;
        // Put self on the "TBD" list of tasks awaiting Timeout.
        // So: dont really 'forget' but remember me in a little bit.
        UDPTimeOutThread.PENDING.add(this);
        return this;
    } catch (Error t) {
        throw Log.err(t);
    }
}
Also used : H2OCountedCompleter(water.H2O.H2OCountedCompleter) H2OCountedCompleter(water.H2O.H2OCountedCompleter) CountedCompleter(jsr166y.CountedCompleter)

Example 5 with CountedCompleter

use of jsr166y.CountedCompleter in project h2o-3 by h2oai.

the class MRThrow method testContinuationThrow.

@Test
public void testContinuationThrow() throws InterruptedException, ExecutionException {
    int sz = H2O.CLOUD.size();
    Vec vec = Vec.makeZero((sz + 1) * FileVec.DFLT_CHUNK_SIZE + 1);
    try {
        for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
            final ByteHistoThrow bh = new ByteHistoThrow(H2O.CLOUD._memary[i]);
            final boolean[] ok = new boolean[] { false };
            try {
                CountedCompleter cc = new CountedCompleter() {

                    @Override
                    public void compute() {
                        tryComplete();
                    }

                    @Override
                    public boolean onExceptionalCompletion(Throwable ex, CountedCompleter cc) {
                        ok[0] = ex.getMessage().contains("test");
                        return super.onExceptionalCompletion(ex, cc);
                    }
                };
                bh.setCompleter(cc);
                bh.dfork(vec);
                // If the chosen file is too small for the cluster, some nodes will have *no* work
                // and so no exception is thrown.
                cc.join();
            } catch (RuntimeException re) {
                assertTrue(re.getMessage().contains("test") || re.getCause().getMessage().contains("test"));
            //        } catch( ExecutionException e ) { // caught on self
            //          assertTrue(e.getMessage().contains("test"));
            } catch (java.lang.AssertionError ae) {
                // Standard junit failure reporting assertion
                throw ae;
            } catch (Throwable ex) {
                ex.printStackTrace();
                fail("Unexpected exception" + ex.toString());
            }
        }
    } finally {
        // remove from DKV
        if (vec != null)
            vec.remove();
    }
}
Also used : Vec(water.fvec.Vec) FileVec(water.fvec.FileVec) CountedCompleter(jsr166y.CountedCompleter)

Aggregations

CountedCompleter (jsr166y.CountedCompleter)8 H2OCountedCompleter (water.H2O.H2OCountedCompleter)5 File (java.io.File)1 ExecutionException (java.util.concurrent.ExecutionException)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Test (org.junit.Test)1 FileVec (water.fvec.FileVec)1 NFSFileVec (water.fvec.NFSFileVec)1 Vec (water.fvec.Vec)1