use of org.apache.drill.common.concurrent.ExtendedLatch in project drill by apache.
the class TimedRunnable method run.
/**
* Execute the list of runnables with the given parallelization. At end, return values and report completion time
* stats to provided logger. Each runnable is allowed a certain timeout. If the timeout exceeds, existing/pending
* tasks will be cancelled and a {@link UserException} is thrown.
* @param activity Name of activity for reporting in logger.
* @param logger The logger to use to report results.
* @param runnables List of runnables that should be executed and timed. If this list has one item, task will be
* completed in-thread. Runnable must handle {@link InterruptedException}s.
* @param parallelism The number of threads that should be run to complete this task.
* @return The list of outcome objects.
* @throws IOException All exceptions are coerced to IOException since this was build for storage system tasks initially.
*/
public static <V> List<V> run(final String activity, final Logger logger, final List<TimedRunnable<V>> runnables, int parallelism) throws IOException {
Stopwatch watch = Stopwatch.createStarted();
long timedRunnableStart = System.nanoTime();
if (runnables.size() == 1) {
parallelism = 1;
runnables.get(0).run();
} else {
parallelism = Math.min(parallelism, runnables.size());
final ExtendedLatch latch = new ExtendedLatch(runnables.size());
final ExecutorService threadPool = Executors.newFixedThreadPool(parallelism);
try {
for (TimedRunnable<V> runnable : runnables) {
threadPool.submit(new LatchedRunnable(latch, runnable));
}
final long timeout = (long) Math.ceil((TIMEOUT_PER_RUNNABLE_IN_MSECS * runnables.size()) / parallelism);
if (!latch.awaitUninterruptibly(timeout)) {
// Issue a shutdown request. This will cause existing threads to interrupt and pending threads to cancel.
// It is highly important that the task Runnables are handling interrupts correctly.
threadPool.shutdownNow();
try {
// Wait for 5s for currently running threads to terminate. Above call (threadPool.shutdownNow()) interrupts
// any running threads. If the runnables are handling the interrupts properly they should be able to
// wrap up and terminate. If not waiting for 5s here gives a chance to identify and log any potential
// thread leaks.
threadPool.awaitTermination(5, TimeUnit.SECONDS);
} catch (final InterruptedException e) {
logger.warn("Interrupted while waiting for pending threads in activity '{}' to terminate.", activity);
}
final String errMsg = String.format("Waited for %dms, but tasks for '%s' are not complete. " + "Total runnable size %d, parallelism %d.", timeout, activity, runnables.size(), parallelism);
logger.error(errMsg);
throw UserException.resourceError().message(errMsg).build(logger);
}
} finally {
if (!threadPool.isShutdown()) {
threadPool.shutdown();
}
}
}
List<V> values = Lists.newArrayList();
long sum = 0;
long max = 0;
long count = 0;
// measure thread creation times
long earliestStart = Long.MAX_VALUE;
long latestStart = 0;
long totalStart = 0;
IOException excep = null;
for (final TimedRunnable<V> reader : runnables) {
try {
values.add(reader.getValue());
sum += reader.getTimeSpentNanos();
count++;
max = Math.max(max, reader.getTimeSpentNanos());
earliestStart = Math.min(earliestStart, reader.getThreadStart() - timedRunnableStart);
latestStart = Math.max(latestStart, reader.getThreadStart() - timedRunnableStart);
totalStart += latestStart = Math.max(latestStart, reader.getThreadStart() - timedRunnableStart);
} catch (IOException e) {
if (excep == null) {
excep = e;
} else {
excep.addSuppressed(e);
}
}
}
if (logger.isInfoEnabled()) {
double avg = (sum / 1000.0 / 1000.0) / (count * 1.0d);
double avgStart = (totalStart / 1000.0) / (count * 1.0d);
logger.info(String.format("%s: Executed %d out of %d using %d threads. " + "Time: %dms total, %fms avg, %dms max.", activity, count, runnables.size(), parallelism, watch.elapsed(TimeUnit.MILLISECONDS), avg, max / 1000 / 1000));
logger.info(String.format("%s: Executed %d out of %d using %d threads. " + "Earliest start: %f μs, Latest start: %f μs, Average start: %f μs .", activity, count, runnables.size(), parallelism, earliestStart / 1000.0, latestStart / 1000.0, avgStart));
}
if (excep != null) {
throw excep;
}
return values;
}
use of org.apache.drill.common.concurrent.ExtendedLatch in project drill by apache.
the class Foreman method setupNonRootFragments.
/**
* Set up the non-root fragments for execution. Some may be local, and some may be remote.
* Messages are sent immediately, so they may start returning data even before we complete this.
*
* @param fragments the fragments
* @throws ForemanException
*/
private void setupNonRootFragments(final Collection<PlanFragment> fragments) throws ForemanException {
if (fragments.isEmpty()) {
// nothing to do here
return;
}
/*
* We will send a single message to each endpoint, regardless of how many fragments will be
* executed there. We need to start up the intermediate fragments first so that they will be
* ready once the leaf fragments start producing data. To satisfy both of these, we will
* make a pass through the fragments and put them into these two maps according to their
* leaf/intermediate state, as well as their target drillbit.
*/
final Multimap<DrillbitEndpoint, PlanFragment> leafFragmentMap = ArrayListMultimap.create();
final Multimap<DrillbitEndpoint, PlanFragment> intFragmentMap = ArrayListMultimap.create();
// record all fragments for status purposes.
for (final PlanFragment planFragment : fragments) {
logger.trace("Tracking intermediate remote node {} with data {}", planFragment.getAssignment(), planFragment.getFragmentJson());
queryManager.addFragmentStatusTracker(planFragment, false);
if (planFragment.getLeafFragment()) {
leafFragmentMap.put(planFragment.getAssignment(), planFragment);
} else {
intFragmentMap.put(planFragment.getAssignment(), planFragment);
}
}
/*
* We need to wait for the intermediates to be sent so that they'll be set up by the time
* the leaves start producing data. We'll use this latch to wait for the responses.
*
* However, in order not to hang the process if any of the RPC requests fails, we always
* count down (see FragmentSubmitFailures), but we count the number of failures so that we'll
* know if any submissions did fail.
*/
final int numIntFragments = intFragmentMap.keySet().size();
final ExtendedLatch endpointLatch = new ExtendedLatch(numIntFragments);
final FragmentSubmitFailures fragmentSubmitFailures = new FragmentSubmitFailures();
// send remote intermediate fragments
for (final DrillbitEndpoint ep : intFragmentMap.keySet()) {
sendRemoteFragments(ep, intFragmentMap.get(ep), endpointLatch, fragmentSubmitFailures);
}
final long timeout = RPC_WAIT_IN_MSECS_PER_FRAGMENT * numIntFragments;
if (numIntFragments > 0 && !endpointLatch.awaitUninterruptibly(timeout)) {
long numberRemaining = endpointLatch.getCount();
throw UserException.connectionError().message("Exceeded timeout (%d) while waiting send intermediate work fragments to remote nodes. " + "Sent %d and only heard response back from %d nodes.", timeout, numIntFragments, numIntFragments - numberRemaining).build(logger);
}
// if any of the intermediate fragment submissions failed, fail the query
final List<FragmentSubmitFailures.SubmissionException> submissionExceptions = fragmentSubmitFailures.submissionExceptions;
if (submissionExceptions.size() > 0) {
Set<DrillbitEndpoint> endpoints = Sets.newHashSet();
StringBuilder sb = new StringBuilder();
boolean first = true;
for (FragmentSubmitFailures.SubmissionException e : fragmentSubmitFailures.submissionExceptions) {
DrillbitEndpoint endpoint = e.drillbitEndpoint;
if (endpoints.add(endpoint)) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(endpoint.getAddress());
}
}
throw UserException.connectionError(submissionExceptions.get(0).rpcException).message("Error setting up remote intermediate fragment execution").addContext("Nodes with failures", sb.toString()).build(logger);
}
injector.injectChecked(queryContext.getExecutionControls(), "send-fragments", ForemanException.class);
/*
* Send the remote (leaf) fragments; we don't wait for these. Any problems will come in through
* the regular sendListener event delivery.
*/
for (final DrillbitEndpoint ep : leafFragmentMap.keySet()) {
sendRemoteFragments(ep, leafFragmentMap.get(ep), null, null);
}
}
use of org.apache.drill.common.concurrent.ExtendedLatch in project drill by apache.
the class CountDownLatchInjectionImpl method initialize.
@Override
public void initialize(final int count) {
Preconditions.checkArgument(latch == null, "Latch can be initialized only once at %s in %s.", desc, siteClass.getSimpleName());
Preconditions.checkArgument(count > 0, "Count has to be a positive integer at %s in %s.", desc, siteClass.getSimpleName());
latch = new ExtendedLatch(count);
}
use of org.apache.drill.common.concurrent.ExtendedLatch in project drill by apache.
the class TestCountDownLatchInjection method latchInjected.
// test would hang if the correct init, wait and countdowns did not happen, and the test timeout mechanism will
@Test
public // catch that case
void latchInjected() {
final int threads = 10;
final ExtendedLatch trigger = new ExtendedLatch(1);
final Pointer<Long> countingDownTime = new Pointer<>();
final String controls = Controls.newBuilder().addLatch(DummyClass.class, DummyClass.LATCH_NAME).build();
ControlsInjectionUtil.setControls(session, controls);
final QueryContext queryContext = new QueryContext(session, bits[0].getContext(), QueryId.getDefaultInstance());
final DummyClass dummyClass = new DummyClass(queryContext, trigger, threads);
(new ThreadCreator(dummyClass, trigger, threads, countingDownTime)).start();
final long timeSpentWaiting;
try {
timeSpentWaiting = dummyClass.initAndWait();
} catch (final InterruptedException e) {
fail("Thread should not be interrupted; there is no deliberate attempt.");
return;
}
assertTrue(timeSpentWaiting >= countingDownTime.value);
try {
queryContext.close();
} catch (final Exception e) {
fail("Failed to close query context: " + e);
}
}
use of org.apache.drill.common.concurrent.ExtendedLatch in project drill by apache.
the class TestDrillbitResilience method passThrough.
// To test pause and resume. Test hangs and times out if resume did not happen.
@Test
public void passThrough() {
final long before = countAllocatedMemory();
final WaitUntilCompleteListener listener = new WaitUntilCompleteListener() {
@Override
public void queryIdArrived(final QueryId queryId) {
super.queryIdArrived(queryId);
final ExtendedLatch trigger = new ExtendedLatch(1);
(new ResumingThread(queryId, ex, trigger)).start();
trigger.countDown();
}
};
final String controls = Controls.newBuilder().addPause(PojoRecordReader.class, "read-next").build();
setControls(controls);
QueryTestUtil.testWithListener(drillClient, QueryType.SQL, TEST_QUERY, listener);
final Pair<QueryState, Exception> result = listener.waitForCompletion();
assertStateCompleted(result, QueryState.COMPLETED);
final long after = countAllocatedMemory();
assertEquals(String.format("We are leaking %d bytes", after - before), before, after);
}
Aggregations