use of jsr166y.CountedCompleter in project h2o-3 by h2oai.
the class ScoreBuildHistogram2 method setupLocal.
@Override
public void setupLocal() {
addToPendingCount(1);
// Init all the internal tree fields after shipping over the wire
_tree.init_tree();
Vec v = _fr2.anyVec();
assert (v != null);
_cids = VecUtils.getLocalChunkIds(v);
_chks = new Chunk[_cids.length][_fr2.numCols()];
_ys = new double[_cids.length][];
_ws = new double[_cids.length][];
_nhs = new int[_cids.length][];
_rss = new int[_cids.length][];
long[] espc = v.espc();
int largestChunkSz = 0;
for (int i = 1; i < espc.length; ++i) {
int sz = (int) (espc[i] - espc[i - 1]);
if (sz > largestChunkSz)
largestChunkSz = sz;
}
final int fLargestChunkSz = largestChunkSz;
if (_weightIdx == -1) {
double[] ws = new double[largestChunkSz];
Arrays.fill(ws, 1);
Arrays.fill(_ws, ws);
}
final AtomicInteger cidx = new AtomicInteger(0);
// First do the phase 1 on all local data
new LocalMR(new MrFun() {
// more or less copied from ScoreBuildHistogram
private void map(int id, Chunk[] chks) {
final C4VolatileChunk nids = (C4VolatileChunk) chks[_nidIdx];
// Pass 1: Score a prior partially-built tree model, and make new Node
// assignments to every row. This involves pulling out the current
// assigned DecidedNode, "scoring" the row against that Node's decision
// criteria, and assigning the row to a new child UndecidedNode (and
// giving it an improved prediction).
int[] nnids;
if (// Prior pass exists?
_leaf > 0)
nnids = score_decide(chks, nids.getValues());
else {
// Just flag all the NA rows
nnids = new int[nids._len];
int[] is = nids.getValues();
for (int row = 0; row < nids._len; row++) {
if (isDecidedRow(is[row]))
nnids[row] = DECIDED_ROW;
}
}
// Pass 2: accumulate all rows, cols into histograms
// Sort the rows by NID, so we visit all the same NIDs in a row
// Find the count of unique NIDs in this chunk
int[] nh = (_nhs[id] = new int[_numLeafs + 1]);
for (int i : nnids) if (i >= 0)
nh[i + 1]++;
// Rollup the histogram of rows-per-NID in this chunk
for (int i = 0; i < _numLeafs; i++) nh[i + 1] += nh[i];
// Splat the rows into NID-groups
int[] rows = (_rss[id] = new int[nnids.length]);
for (int row = 0; row < nnids.length; row++) if (nnids[row] >= 0)
rows[nh[nnids[row]]++] = row;
}
@Override
protected void map(int id) {
Vec[] vecs = _fr2.vecs();
for (id = cidx.getAndIncrement(); id < _cids.length; id = cidx.getAndIncrement()) {
int cidx = _cids[id];
Chunk[] chks = _chks[id];
for (int i = 0; i < chks.length; ++i) chks[i] = vecs[i].chunkForChunkIdx(cidx);
map(id, chks);
chks[_nidIdx].close(cidx, _fs);
Chunk resChk = chks[_workIdx];
int len = resChk.len();
if (resChk instanceof C8DVolatileChunk) {
_ys[id] = ((C8DVolatileChunk) resChk).getValues();
} else
_ys[id] = resChk.getDoubles(MemoryManager.malloc8d(len), 0, len);
if (_weightIdx != -1) {
_ws[id] = chks[_weightIdx].getDoubles(MemoryManager.malloc8d(len), 0, len);
}
}
}
}, new H2O.H2OCountedCompleter(this) {
public void onCompletion(CountedCompleter cc) {
final int ncols = _ncols;
final int[] active_cols = _activeCols == null ? null : new int[Math.max(1, _activeCols.cardinality())];
int nactive_cols = active_cols == null ? ncols : active_cols.length;
final int numWrks = _hcs.length * nactive_cols < 16 * 1024 ? H2O.NUMCPUS : Math.min(H2O.NUMCPUS, Math.max(4 * H2O.NUMCPUS / nactive_cols, 1));
final int rem = H2O.NUMCPUS - numWrks * ncols;
ScoreBuildHistogram2.this.addToPendingCount(1 + nactive_cols);
if (active_cols != null) {
int j = 0;
for (int i = 0; i < ncols; ++i) if (_activeCols.contains(i))
active_cols[j++] = i;
}
// MRTask (over columns) launching MrTasks (over number of workers) for each column.
// We want FJ to start processing all the columns before parallelizing within column to reduce memory overhead.
// (running single column in n threads means n-copies of the histogram)
// This is how it works:
// 1) Outer MRTask walks down it's tree, forking tasks with exponentially decreasing number of columns until reaching its left most leaf for columns 0.
// At this point, the local fjq for this thread has a task for processing half of columns at the bottom, followed by task for 1/4 of columns and so on.
// Other threads start stealing work from the bottom.
// 2) forks the leaf task and (because its polling from the top) executes the LocalMr for the column 0.
// This way we should have columns as equally distributed as possible without resorting to shared priority queue
new LocalMR(new MrFun() {
@Override
protected void map(int c) {
c = active_cols == null ? c : active_cols[c];
new LocalMR(new ComputeHistoThread(_hcs.length == 0 ? new DHistogram[0] : _hcs[c], c, fLargestChunkSz, new AtomicInteger()), numWrks + (c < rem ? 1 : 0), ScoreBuildHistogram2.this).fork();
}
}, nactive_cols, ScoreBuildHistogram2.this).fork();
}
}).fork();
}
use of jsr166y.CountedCompleter in project h2o-2 by h2oai.
the class FrameSplitter method compute2.
@Override
public void compute2() {
// Lock all possible data
dataset.read_lock(jobKey);
// Create a template vector for each segment
final Vec[][] templates = makeTemplates(dataset, ratios);
final int nsplits = templates.length;
assert nsplits == ratios.length + 1 : "Unexpected number of split templates!";
// Launch number of distributed FJ for each split part
final Vec[] datasetVecs = dataset.vecs();
splits = new Frame[nsplits];
for (int s = 0; s < nsplits; s++) {
Frame split = new Frame(destKeys[s], dataset.names(), templates[s]);
split.delete_and_lock(jobKey);
splits[s] = split;
}
setPendingCount(1);
H2O.submitTask(new H2OCountedCompleter(FrameSplitter.this) {
@Override
public void compute2() {
setPendingCount(nsplits);
for (int s = 0; s < nsplits; s++) {
new FrameSplitTask(new // Completer for this task
H2OCountedCompleter(// Completer for this task
this) {
@Override
public void compute2() {
}
@Override
public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
synchronized (FrameSplitter.this) {
// synchronized on this since can be accessed from different workers
workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1];
workersExceptions[workersExceptions.length - 1] = ex;
}
// we handle the exception so wait perform normal completion
tryComplete();
return false;
}
}, datasetVecs, ratios, s).asyncExec(splits[s]);
}
// complete the computation of nsplits-tasks
tryComplete();
}
});
// complete the computation of thrown tasks
tryComplete();
}
use of jsr166y.CountedCompleter in project h2o-3 by h2oai.
the class RPC method call.
// Make an initial RPC, or re-send a packet. Always called on 1st send; also
// called on a timeout.
public synchronized RPC<V> call() {
// Any Completer will not be carried over to remote; add it to the RPC call
// so completion is signaled after the remote comes back.
CountedCompleter cc = _dt.getCompleter();
if (cc != null)
handleCompleter(cc);
// If running on self, just submit to queues & do locally
if (_target == H2O.SELF)
return handleLocal();
// Keep a global record, for awhile
if (_target != null)
_target.taskPut(_tasknum, this);
try {
// Racing Nack rechecked under lock; no need to send retry
if (_nack)
return this;
// We could be racing timeouts-vs-replies. Blow off timeout if we have an answer.
if (isDone()) {
if (_target != null)
_target.taskRemove(_tasknum);
return this;
}
// send the basic UDP control packet.
if (!_sentTcp) {
while (true) {
// Retry loop for broken TCP sends
AutoBuffer ab = new AutoBuffer(_target, _dt.priority());
try {
final boolean t;
ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND);
ab.put(_dt);
t = ab.hasTCP();
assert sz_check(ab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + ab.size() + " for task#" + _tasknum;
// Then close; send final byte
ab.close();
// Set after close (and any other possible fail)
_sentTcp = t;
// Break out of retry loop
break;
} catch (AutoBuffer.AutoBufferException e) {
Log.info("IOException during RPC call: " + e._ioe.getMessage() + ", AB=" + ab + ", for task#" + _tasknum + ", waiting and retrying...");
ab.drainClose();
try {
Thread.sleep(500);
} catch (InterruptedException ignore) {
}
}
}
// end of while(true)
} else {
// Else it was sent via TCP in a prior attempt, and we've timed out.
// This means the caller's ACK/answer probably got dropped and we need
// him to resend it (or else the caller is still processing our
// request). Send a UDP reminder - but with the CLIENT_TCP_SEND flag
// instead of the UDP send, and no DTask (since it previously went via
// TCP, no need to resend it).
AutoBuffer ab = new AutoBuffer(_target, _dt.priority()).putTask(UDP.udp.exec, _tasknum);
ab.put1(CLIENT_TCP_SEND).close();
}
// Double retry until we exceed existing age. This is the time to delay
// until we try again. Note that we come here immediately on creation,
// so the first doubling happens before anybody does any waiting. Also
// note the generous 5sec cap: ping at least every 5 sec.
_retry += (_retry < MAX_TIMEOUT) ? _retry : MAX_TIMEOUT;
// UDPTimeOutThread.PENDING.put(_tasknum, this);
return this;
} catch (Throwable t) {
t.printStackTrace();
throw Log.throwErr(t);
}
}
use of jsr166y.CountedCompleter in project h2o-2 by h2oai.
the class RPC method call.
// Make an initial RPC, or re-send a packet. Always called on 1st send; also
// called on a timeout.
public synchronized RPC<V> call() {
++_callCnt;
// add it to the RPC call.
if (_dt.getCompleter() != null) {
CountedCompleter cc = _dt.getCompleter();
assert cc instanceof H2OCountedCompleter;
boolean alreadyIn = false;
if (_fjtasks != null)
for (H2OCountedCompleter hcc : _fjtasks) if (hcc == cc)
alreadyIn = true;
if (!alreadyIn)
addCompleter((H2OCountedCompleter) cc);
_dt.setCompleter(null);
}
// If running on self, just submit to queues & do locally
if (_target == H2O.SELF) {
assert _dt.getCompleter() == null;
_dt.setCompleter(new H2O.H2OCallback<DTask>() {
@Override
public void callback(DTask dt) {
assert dt == _dt;
synchronized (RPC.this) {
// F/J guarentees called once
assert !_done;
_done = true;
RPC.this.notifyAll();
}
doAllCompletions();
}
@Override
public boolean onExceptionalCompletion(Throwable ex, CountedCompleter dt) {
assert dt == _dt;
synchronized (RPC.this) {
// Filter down to 1st exceptional completion
if (_done)
return true;
_dt.setException(ex);
// must be set as the last thing before notify, the waiting thread can wake up any at any time!
_done = true;
RPC.this.notifyAll();
}
doAllCompletions();
return true;
}
});
H2O.submitTask(_dt);
return this;
}
// Keep a global record, for awhile
if (_target != null)
_target.taskPut(_tasknum, this);
try {
// We could be racing timeouts-vs-replies. Blow off timeout if we have an answer.
if (isDone()) {
if (_target != null)
_target.taskRemove(_tasknum);
return this;
}
// send the basic UDP control packet.
if (!_sentTcp) {
// Ship the UDP packet!
while (true) {
// Retry loop for broken TCP sends
AutoBuffer ab = new AutoBuffer(_target);
try {
ab.putTask(UDP.udp.exec, _tasknum).put1(CLIENT_UDP_SEND).put(_dt);
boolean t = ab.hasTCP();
assert sz_check(ab) : "Resend of " + _dt.getClass() + " changes size from " + _size + " to " + ab.size() + " for task#" + _tasknum;
// Then close; send final byte
ab.close();
// Set after close (and any other possible fail)
_sentTcp = t;
// Break out of retry loop
break;
} catch (AutoBuffer.AutoBufferException e) {
Log.info_no_DKV(Log.Tag.Sys.WATER, "IOException during RPC call: " + e._ioe.getMessage() + ", AB=" + ab + ", for task#" + _tasknum + ", waiting and retrying...");
ab.drainClose();
try {
Thread.sleep(500);
} catch (InterruptedException ignore) {
}
}
}
// end of while(true)
} else {
// Else it was sent via TCP in a prior attempt, and we've timed out.
// This means the caller's ACK/answer probably got dropped and we need
// him to resend it (or else the caller is still processing our
// request). Send a UDP reminder - but with the CLIENT_TCP_SEND flag
// instead of the UDP send, and no DTask (since it previously went via
// TCP, no need to resend it).
AutoBuffer ab = new AutoBuffer(_target).putTask(UDP.udp.exec, _tasknum);
ab.put1(CLIENT_TCP_SEND).close();
}
// Double retry until we exceed existing age. This is the time to delay
// until we try again. Note that we come here immediately on creation,
// so the first doubling happens before anybody does any waiting. Also
// note the generous 5sec cap: ping at least every 5 sec.
_retry += (_retry < 5000) ? _retry : 5000;
// Put self on the "TBD" list of tasks awaiting Timeout.
// So: dont really 'forget' but remember me in a little bit.
UDPTimeOutThread.PENDING.add(this);
return this;
} catch (Error t) {
throw Log.err(t);
}
}
use of jsr166y.CountedCompleter in project h2o-3 by h2oai.
the class MRThrow method testContinuationThrow.
@Test
public void testContinuationThrow() throws InterruptedException, ExecutionException {
int sz = H2O.CLOUD.size();
Vec vec = Vec.makeZero((sz + 1) * FileVec.DFLT_CHUNK_SIZE + 1);
try {
for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
final ByteHistoThrow bh = new ByteHistoThrow(H2O.CLOUD._memary[i]);
final boolean[] ok = new boolean[] { false };
try {
CountedCompleter cc = new CountedCompleter() {
@Override
public void compute() {
tryComplete();
}
@Override
public boolean onExceptionalCompletion(Throwable ex, CountedCompleter cc) {
ok[0] = ex.getMessage().contains("test");
return super.onExceptionalCompletion(ex, cc);
}
};
bh.setCompleter(cc);
bh.dfork(vec);
// If the chosen file is too small for the cluster, some nodes will have *no* work
// and so no exception is thrown.
cc.join();
} catch (RuntimeException re) {
assertTrue(re.getMessage().contains("test") || re.getCause().getMessage().contains("test"));
// } catch( ExecutionException e ) { // caught on self
// assertTrue(e.getMessage().contains("test"));
} catch (java.lang.AssertionError ae) {
// Standard junit failure reporting assertion
throw ae;
} catch (Throwable ex) {
ex.printStackTrace();
fail("Unexpected exception" + ex.toString());
}
}
} finally {
// remove from DKV
if (vec != null)
vec.remove();
}
}
Aggregations