use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class IndexShardTests method testRelocatedShardCanNotBeRevivedConcurrently.
public void testRelocatedShardCanNotBeRevivedConcurrently() throws IOException, InterruptedException, BrokenBarrierException {
final IndexShard shard = newStartedShard(true);
final ShardRouting originalRouting = shard.routingEntry();
shard.updateRoutingEntry(ShardRoutingHelper.relocate(originalRouting, "other_node"));
CyclicBarrier cyclicBarrier = new CyclicBarrier(3);
AtomicReference<Exception> relocationException = new AtomicReference<>();
Thread relocationThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
relocationException.set(e);
}
@Override
protected void doRun() throws Exception {
cyclicBarrier.await();
shard.relocated("test");
}
});
relocationThread.start();
AtomicReference<Exception> cancellingException = new AtomicReference<>();
Thread cancellingThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
cancellingException.set(e);
}
@Override
protected void doRun() throws Exception {
cyclicBarrier.await();
shard.updateRoutingEntry(originalRouting);
}
});
cancellingThread.start();
cyclicBarrier.await();
relocationThread.join();
cancellingThread.join();
if (shard.state() == IndexShardState.RELOCATED) {
logger.debug("shard was relocated successfully");
assertThat(cancellingException.get(), instanceOf(IllegalIndexShardStateException.class));
assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(true));
assertThat(relocationException.get(), nullValue());
} else {
logger.debug("shard relocation was cancelled");
assertThat(relocationException.get(), instanceOf(IllegalIndexShardStateException.class));
assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(false));
assertThat(cancellingException.get(), nullValue());
}
closeShards(shard);
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class TranslogTests method testConcurrentWriteViewsAndSnapshot.
/**
* Tests that concurrent readers and writes maintain view and snapshot semantics
*/
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
final Thread[] writers = new Thread[randomIntBetween(1, 10)];
final Thread[] readers = new Thread[randomIntBetween(1, 10)];
final int flushEveryOps = randomIntBetween(5, 100);
// used to notify main thread that so many operations have been written so it can simulate a flush
final AtomicReference<CountDownLatch> writtenOpsLatch = new AtomicReference<>(new CountDownLatch(0));
final AtomicLong idGenerator = new AtomicLong();
final CyclicBarrier barrier = new CyclicBarrier(writers.length + readers.length + 1);
// a map of all written ops and their returned location.
final Map<Translog.Operation, Translog.Location> writtenOps = ConcurrentCollections.newConcurrentMap();
// a signal for all threads to stop
final AtomicBoolean run = new AtomicBoolean(true);
// any errors on threads
final List<Exception> errors = new CopyOnWriteArrayList<>();
logger.debug("using [{}] readers. [{}] writers. flushing every ~[{}] ops.", readers.length, writers.length, flushEveryOps);
for (int i = 0; i < writers.length; i++) {
final String threadName = "writer_" + i;
final int threadId = i;
writers[i] = new Thread(new AbstractRunnable() {
@Override
public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
barrier.await();
int counter = 0;
while (run.get()) {
long id = idGenerator.incrementAndGet();
final Translog.Operation op;
final Translog.Operation.Type type = Translog.Operation.Type.values()[((int) (id % Translog.Operation.Type.values().length))];
switch(type) {
case CREATE:
case INDEX:
op = new Translog.Index("type", "" + id, new byte[] { (byte) id });
break;
case DELETE:
op = new Translog.Delete(newUid("" + id));
break;
case NO_OP:
op = new Translog.NoOp(id, id, Long.toString(id));
break;
default:
throw new AssertionError("unsupported operation type [" + type + "]");
}
Translog.Location location = translog.add(op);
Translog.Location existing = writtenOps.put(op, location);
if (existing != null) {
fail("duplicate op [" + op + "], old entry at " + location);
}
if (id % writers.length == threadId) {
translog.ensureSynced(location);
}
writtenOpsLatch.get().countDown();
counter++;
}
logger.debug("--> [{}] done. wrote [{}] ops.", threadName, counter);
}
@Override
public void onFailure(Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("--> writer [{}] had an error", threadName), e);
errors.add(e);
}
}, threadName);
writers[i].start();
}
for (int i = 0; i < readers.length; i++) {
final String threadId = "reader_" + i;
readers[i] = new Thread(new AbstractRunnable() {
Translog.View view = null;
Set<Translog.Operation> writtenOpsAtView;
@Override
public void onFailure(Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("--> reader [{}] had an error", threadId), e);
errors.add(e);
try {
closeView();
} catch (IOException inner) {
inner.addSuppressed(e);
logger.error("unexpected error while closing view, after failure", inner);
}
}
void closeView() throws IOException {
if (view != null) {
view.close();
}
}
void newView() throws IOException {
closeView();
view = translog.newView();
// captures the currently written ops so we know what to expect from the view
writtenOpsAtView = new HashSet<>(writtenOps.keySet());
logger.debug("--> [{}] opened view from [{}]", threadId, view.minTranslogGeneration());
}
@Override
protected void doRun() throws Exception {
barrier.await();
int iter = 0;
while (run.get()) {
if (iter++ % 10 == 0) {
newView();
}
// captures al views that are written since the view was created (with a small caveat see bellow)
// these are what we expect the snapshot to return (and potentially some more).
Set<Translog.Operation> expectedOps = new HashSet<>(writtenOps.keySet());
expectedOps.removeAll(writtenOpsAtView);
Translog.Snapshot snapshot = view.snapshot();
Translog.Operation op;
while ((op = snapshot.next()) != null) {
expectedOps.remove(op);
}
if (expectedOps.isEmpty() == false) {
StringBuilder missed = new StringBuilder("missed ").append(expectedOps.size()).append(" operations");
boolean failed = false;
for (Translog.Operation expectedOp : expectedOps) {
final Translog.Location loc = writtenOps.get(expectedOp);
if (loc.generation < view.minTranslogGeneration()) {
// may yet be available in writtenOpsAtView, meaning we will erroneously expect them
continue;
}
failed = true;
missed.append("\n --> [").append(expectedOp).append("] written at ").append(loc);
}
if (failed) {
fail(missed.toString());
}
}
// slow down things a bit and spread out testing..
writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS);
}
closeView();
logger.debug("--> [{}] done. tested [{}] snapshots", threadId, iter);
}
}, threadId);
readers[i].start();
}
barrier.await();
try {
for (int iterations = scaledRandomIntBetween(10, 200); iterations > 0 && errors.isEmpty(); iterations--) {
writtenOpsLatch.set(new CountDownLatch(flushEveryOps));
while (writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS) == false) {
if (errors.size() > 0) {
break;
}
}
translog.commit();
}
} finally {
run.set(false);
logger.debug("--> waiting for threads to stop");
for (Thread thread : writers) {
thread.join();
}
for (Thread thread : readers) {
thread.join();
}
if (errors.size() > 0) {
Throwable e = errors.get(0);
for (Throwable suppress : errors.subList(1, errors.size())) {
e.addSuppressed(suppress);
}
throw e;
}
logger.info("--> test done. total ops written [{}]", writtenOps.size());
}
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class LongGCDisruption method startDisrupting.
@Override
public synchronized void startDisrupting() {
if (suspendedThreads == null) {
boolean success = false;
try {
suspendedThreads = ConcurrentHashMap.newKeySet();
final String currentThreadName = Thread.currentThread().getName();
assert isDisruptedNodeThread(currentThreadName) == false : "current thread match pattern. thread name: " + currentThreadName + ", node: " + disruptedNode;
// we spawn a background thread to protect against deadlock which can happen
// if there are shared resources between caller thread and and suspended threads
// see unsafeClasses to how to avoid that
final AtomicReference<Exception> stoppingError = new AtomicReference<>();
final Thread stoppingThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
stoppingError.set(e);
}
@Override
protected void doRun() throws Exception {
// keep trying to stop threads, until no new threads are discovered.
while (stopNodeThreads(suspendedThreads)) {
if (Thread.interrupted()) {
return;
}
}
}
});
stoppingThread.setName(currentThreadName + "[LongGCDisruption][threadStopper]");
stoppingThread.start();
try {
stoppingThread.join(getStoppingTimeoutInMillis());
} catch (InterruptedException e) {
// best effort to signal stopping
stoppingThread.interrupt();
throw new RuntimeException(e);
}
if (stoppingError.get() != null) {
throw new RuntimeException("unknown error while stopping threads", stoppingError.get());
}
if (stoppingThread.isAlive()) {
logger.warn("failed to stop node [{}]'s threads within [{}] millis. Stopping thread stack trace:\n {}", disruptedNode, getStoppingTimeoutInMillis(), stackTrace(stoppingThread.getStackTrace()));
// best effort;
stoppingThread.interrupt();
throw new RuntimeException("stopping node threads took too long");
}
// of the threads that was suspended
if (isBlockDetectionSupported()) {
blockDetectionThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
if (e instanceof InterruptedException == false) {
throw new AssertionError("unexpected exception in blockDetectionThread", e);
}
}
@Override
protected void doRun() throws Exception {
while (Thread.currentThread().isInterrupted() == false) {
ThreadInfo[] threadInfos = threadBean.dumpAllThreads(true, true);
for (ThreadInfo threadInfo : threadInfos) {
if (isDisruptedNodeThread(threadInfo.getThreadName()) == false && threadInfo.getLockOwnerName() != null && isDisruptedNodeThread(threadInfo.getLockOwnerName())) {
// find ThreadInfo object of the blocking thread (if available)
ThreadInfo blockingThreadInfo = null;
for (ThreadInfo otherThreadInfo : threadInfos) {
if (otherThreadInfo.getThreadId() == threadInfo.getLockOwnerId()) {
blockingThreadInfo = otherThreadInfo;
break;
}
}
onBlockDetected(threadInfo, blockingThreadInfo);
}
}
Thread.sleep(getBlockDetectionIntervalInMillis());
}
}
});
blockDetectionThread.setName(currentThreadName + "[LongGCDisruption][blockDetection]");
blockDetectionThread.start();
}
success = true;
} finally {
if (success == false) {
stopBlockDetection();
// resume threads if failed
resumeThreads(suspendedThreads);
suspendedThreads = null;
}
}
} else {
throw new IllegalStateException("can't disrupt twice, call stopDisrupting() first");
}
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class LocalCheckpointTrackerTests method testConcurrentReplica.
public void testConcurrentReplica() throws InterruptedException {
Thread[] threads = new Thread[randomIntBetween(2, 5)];
final int opsPerThread = randomIntBetween(10, 20);
final int maxOps = opsPerThread * threads.length;
// make sure we always index the last seqNo to simplify maxSeq checks
final long unFinishedSeq = randomIntBetween(0, maxOps - 2);
Set<Integer> seqNos = IntStream.range(0, maxOps).boxed().collect(Collectors.toSet());
final Integer[][] seqNoPerThread = new Integer[threads.length][];
for (int t = 0; t < threads.length - 1; t++) {
int size = Math.min(seqNos.size(), randomIntBetween(opsPerThread - 4, opsPerThread + 4));
seqNoPerThread[t] = randomSubsetOf(size, seqNos).toArray(new Integer[size]);
seqNos.removeAll(Arrays.asList(seqNoPerThread[t]));
}
seqNoPerThread[threads.length - 1] = seqNos.toArray(new Integer[seqNos.size()]);
logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinishedSeq);
final CyclicBarrier barrier = new CyclicBarrier(threads.length);
for (int t = 0; t < threads.length; t++) {
final int threadId = t;
threads[t] = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
throw new ElasticsearchException("failure in background thread", e);
}
@Override
protected void doRun() throws Exception {
barrier.await();
Integer[] ops = seqNoPerThread[threadId];
for (int seqNo : ops) {
if (seqNo != unFinishedSeq) {
tracker.markSeqNoAsCompleted(seqNo);
logger.info("[t{}] completed [{}]", threadId, seqNo);
}
}
}
}, "testConcurrentReplica_" + threadId);
threads[t].start();
}
for (Thread thread : threads) {
thread.join();
}
assertThat(tracker.getMaxSeqNo(), equalTo(maxOps - 1L));
assertThat(tracker.getCheckpoint(), equalTo(unFinishedSeq - 1L));
tracker.markSeqNoAsCompleted(unFinishedSeq);
assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L));
assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / SMALL_CHUNK_SIZE) * SMALL_CHUNK_SIZE));
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class MockTransportService method addUnresponsiveRule.
/**
* Adds a rule that will cause ignores each send request, simulating an unresponsive node
* and failing to connect once the rule was added.
*
* @param duration the amount of time to delay sending and connecting.
*/
public void addUnresponsiveRule(TransportAddress transportAddress, final TimeValue duration) {
final long startTime = System.currentTimeMillis();
addDelegate(transportAddress, new ClearableTransport(original) {
private final Queue<Runnable> requestsToSendWhenCleared = new LinkedBlockingDeque<Runnable>();
private boolean cleared = false;
TimeValue getDelay() {
return new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
}
@Override
public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile, CheckedBiConsumer<Connection, ConnectionProfile, IOException> connectionValidator) throws ConnectTransportException {
if (original.nodeConnected(node)) {
// connecting to an already connected node is a no-op
return;
}
TimeValue delay = getDelay();
if (delay.millis() <= 0) {
original.connectToNode(node, connectionProfile, connectionValidator);
return;
}
// TODO: Replace with proper setting
TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_CONNECT_TIMEOUT.getDefault(Settings.EMPTY);
try {
if (delay.millis() < connectingTimeout.millis()) {
Thread.sleep(delay.millis());
original.connectToNode(node, connectionProfile, connectionValidator);
} else {
Thread.sleep(connectingTimeout.millis());
throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
}
} catch (InterruptedException e) {
throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
}
}
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
// delayed sending - even if larger then the request timeout to simulated a potential late response from target node
TimeValue delay = getDelay();
if (delay.millis() <= 0) {
connection.sendRequest(requestId, action, request, options);
return;
}
// poor mans request cloning...
RequestHandlerRegistry reg = MockTransportService.this.getRequestHandler(action);
BytesStreamOutput bStream = new BytesStreamOutput();
request.writeTo(bStream);
final TransportRequest clonedRequest = reg.newRequest();
clonedRequest.readFrom(bStream.bytes().streamInput());
Runnable runnable = new AbstractRunnable() {
AtomicBoolean requestSent = new AtomicBoolean();
@Override
public void onFailure(Exception e) {
logger.debug("failed to send delayed request", e);
}
@Override
protected void doRun() throws IOException {
if (requestSent.compareAndSet(false, true)) {
connection.sendRequest(requestId, action, clonedRequest, options);
}
}
};
// store the request to send it once the rule is cleared.
synchronized (this) {
if (cleared) {
runnable.run();
} else {
requestsToSendWhenCleared.add(runnable);
threadPool.schedule(delay, ThreadPool.Names.GENERIC, runnable);
}
}
}
@Override
public void clearRule() {
synchronized (this) {
assert cleared == false;
cleared = true;
requestsToSendWhenCleared.forEach(Runnable::run);
}
}
});
}
Aggregations