use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project crate by crate.
the class BlobRecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link SnapshotIndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p/>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
public void phase1(final SnapshotIndexCommit snapshot, final Translog.View translogView) {
cancellableThreads.checkForCancel();
// Total size of segment files that are recovered
long totalSize = 0;
// Total size of segment files that were able to be re-used
long existingTotalSize = 0;
final Store store = shard.store();
store.incRef();
try {
// CRATE CHANGE
if (blobRecoveryHandler != null) {
blobRecoveryHandler.phase1();
}
StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.engine().failEngine("recovery", ex);
throw ex;
}
for (String name : snapshot.getFiles()) {
final StoreFileMetaData md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
if (recoverWithSyncId) {
final long numDocsTarget = request.metadataSnapshot().getNumDocs();
final long numDocsSource = recoverySourceMetadata.getNumDocs();
if (numDocsTarget != numDocsSource) {
throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "(" + request.targetNode().getName() + ")");
}
// we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
// so we don't return here
logger.trace("[{}][{}] skipping [phase1] to {} - identical sync id [{}] found on both source and target", indexName, shardId, request.targetNode(), recoverySourceSyncId);
} else {
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetaData md : diff.identical) {
response.phase1ExistingFileNames.add(md.name());
response.phase1ExistingFileSizes.add(md.length());
existingTotalSize += md.length();
if (logger.isTraceEnabled()) {
logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]", indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length());
}
totalSize += md.length();
}
for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", indexName, shardId, request.targetNode(), md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote", indexName, shardId, request.targetNode(), md.name());
}
response.phase1FileNames.add(md.name());
response.phase1FileSizes.add(md.length());
totalSize += md.length();
}
response.phase1TotalSize = totalSize;
response.phase1ExistingTotalSize = existingTotalSize;
logger.trace("[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", indexName, shardId, request.targetNode(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
cancellableThreads.execute(new Interruptable() {
@Override
public void run() throws InterruptedException {
RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(request.recoveryId(), request.shardId(), response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations());
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO, recoveryInfoFilesRequest, TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
}
});
// This latch will be used to wait until all files have been transferred to the target node
final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
final CopyOnWriteArrayList<Throwable> exceptions = new CopyOnWriteArrayList<>();
final AtomicReference<Throwable> corruptedEngine = new AtomicReference<>();
int fileIndex = 0;
ThreadPoolExecutor pool;
// How many bytes we've copied since we last called RateLimiter.pause
final AtomicLong bytesSinceLastPause = new AtomicLong();
for (final String name : response.phase1FileNames) {
long fileSize = response.phase1FileSizes.get(fileIndex);
// separately.
if (fileSize > RecoverySettings.SMALL_FILE_CUTOFF_BYTES) {
pool = recoverySettings.concurrentStreamPool();
} else {
pool = recoverySettings.concurrentSmallFileStreamPool();
}
pool.execute(new AbstractRunnable() {
@Override
public void onFailure(Throwable t) {
// we either got rejected or the store can't be incremented / we are canceled
logger.debug("Failed to transfer file [" + name + "] on recovery");
}
@Override
public void onAfter() {
// Signify this file has completed by decrementing the latch
latch.countDown();
}
@Override
protected void doRun() {
cancellableThreads.checkForCancel();
store.incRef();
final StoreFileMetaData md = recoverySourceMetadata.get(name);
try (final IndexInput indexInput = store.directory().openInput(name, IOContext.READONCE)) {
// at least one!
final int BUFFER_SIZE = (int) Math.max(1, recoverySettings.fileChunkSize().getBytes());
final byte[] buf = new byte[BUFFER_SIZE];
boolean shouldCompressRequest = recoverySettings.compress();
if (CompressorFactory.isCompressed(indexInput)) {
shouldCompressRequest = false;
}
final long len = indexInput.length();
long readCount = 0;
final TransportRequestOptions requestOptions = TransportRequestOptions.builder().withCompress(shouldCompressRequest).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(recoverySettings.internalActionTimeout()).build();
while (readCount < len) {
if (shard.state() == IndexShardState.CLOSED) {
// check if the shard got closed on us
throw new IndexShardClosedException(shard.shardId());
}
int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount) : BUFFER_SIZE;
final long position = indexInput.getFilePointer();
// Pause using the rate limiter, if desired, to throttle the recovery
RateLimiter rl = recoverySettings.rateLimiter();
long throttleTimeInNanos = 0;
if (rl != null) {
long bytes = bytesSinceLastPause.addAndGet(toRead);
if (bytes > rl.getMinPauseCheckBytes()) {
// Time to pause
bytesSinceLastPause.addAndGet(-bytes);
throttleTimeInNanos = rl.pause(bytes);
shard.recoveryStats().addThrottleTime(throttleTimeInNanos);
}
}
indexInput.readBytes(buf, 0, toRead, false);
final BytesArray content = new BytesArray(buf, 0, toRead);
readCount += toRead;
final boolean lastChunk = readCount == len;
final RecoveryFileChunkRequest fileChunkRequest = new RecoveryFileChunkRequest(request.recoveryId(), request.shardId(), md, position, content, lastChunk, translogView.totalOperations(), throttleTimeInNanos);
cancellableThreads.execute(new Interruptable() {
@Override
public void run() throws InterruptedException {
// Actually send the file chunk to the target node, waiting for it to complete
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILE_CHUNK, fileChunkRequest, requestOptions, EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
}
});
}
} catch (Throwable e) {
final Throwable corruptIndexException;
if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(e)) != null) {
if (store.checkIntegrityNoException(md) == false) {
// we are corrupted on the primary -- fail!
logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
if (corruptedEngine.compareAndSet(null, corruptIndexException) == false) {
// if we are not the first exception, add ourselves as suppressed to the main one:
corruptedEngine.get().addSuppressed(e);
}
} else {
// corruption has happened on the way to replica
RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
exception.addSuppressed(e);
// last exception first
exceptions.add(0, exception);
logger.warn("{} Remote file corruption on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode(), md);
}
} else {
// last exceptions first
exceptions.add(0, e);
}
} finally {
store.decRef();
}
}
});
fileIndex++;
}
cancellableThreads.execute(new Interruptable() {
@Override
public void run() throws InterruptedException {
// Wait for all files that need to be transferred to finish transferring
latch.await();
}
});
if (corruptedEngine.get() != null) {
shard.engine().failEngine("recovery", corruptedEngine.get());
throw corruptedEngine.get();
} else {
ExceptionsHelper.rethrowAndSuppress(exceptions);
}
cancellableThreads.execute(new Interruptable() {
@Override
public void run() throws InterruptedException {
// are deleted
try {
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
} catch (RemoteTransportException remoteException) {
final IOException corruptIndexException;
// - maybe due to old segments without checksums or length only checks
if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(remoteException)) != null) {
try {
final Store.MetadataSnapshot recoverySourceMetadata = store.getMetadata(snapshot);
StoreFileMetaData[] metadata = Iterables.toArray(recoverySourceMetadata, StoreFileMetaData.class);
ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() {
@Override
public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
// check small files first
return Long.compare(o1.length(), o2.length());
}
});
for (StoreFileMetaData md : metadata) {
logger.debug("{} checking integrity for file {} after remove corruption exception", shard.shardId(), md);
if (store.checkIntegrityNoException(md) == false) {
// we are corrupted on the primary -- fail!
shard.engine().failEngine("recovery", corruptIndexException);
logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
throw corruptIndexException;
}
}
} catch (IOException ex) {
remoteException.addSuppressed(ex);
throw remoteException;
}
// corruption has happened on the way to replica
RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
exception.addSuppressed(remoteException);
logger.warn("{} Remote file corruption during finalization on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode());
throw exception;
} else {
throw remoteException;
}
}
}
});
}
prepareTargetForTranslog(translogView);
logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", indexName, shardId, request.targetNode(), stopWatch.totalTime());
response.phase1Time = stopWatch.totalTime().millis();
} catch (Throwable e) {
throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
} finally {
store.decRef();
}
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class RemoteScrollableHitSource method execute.
private <T> void execute(String method, String uri, Map<String, String> params, HttpEntity entity, BiFunction<XContentParser, XContentType, T> parser, Consumer<? super T> listener) {
// Preserve the thread context so headers survive after the call
java.util.function.Supplier<ThreadContext.StoredContext> contextSupplier = threadPool.getThreadContext().newRestorableContext(true);
class RetryHelper extends AbstractRunnable {
private final Iterator<TimeValue> retries = backoffPolicy.iterator();
@Override
protected void doRun() throws Exception {
client.performRequestAsync(method, uri, params, entity, new ResponseListener() {
@Override
public void onSuccess(org.elasticsearch.client.Response response) {
// Restore the thread context to get the precious headers
try (ThreadContext.StoredContext ctx = contextSupplier.get()) {
// eliminates compiler warning
assert ctx != null;
T parsedResponse;
try {
HttpEntity responseEntity = response.getEntity();
InputStream content = responseEntity.getContent();
XContentType xContentType = null;
if (responseEntity.getContentType() != null) {
final String mimeType = ContentType.parse(responseEntity.getContentType().getValue()).getMimeType();
xContentType = XContentType.fromMediaType(mimeType);
}
if (xContentType == null) {
try {
throw new ElasticsearchException("Response didn't include Content-Type: " + bodyMessage(response.getEntity()));
} catch (IOException e) {
ElasticsearchException ee = new ElasticsearchException("Error extracting body from response");
ee.addSuppressed(e);
throw ee;
}
}
// EMPTY is safe here because we don't call namedObject
try (XContentParser xContentParser = xContentType.xContent().createParser(NamedXContentRegistry.EMPTY, content)) {
parsedResponse = parser.apply(xContentParser, xContentType);
} catch (ParsingException e) {
/* Because we're streaming the response we can't get a copy of it here. The best we can do is hint that it
* is totally wrong and we're probably not talking to Elasticsearch. */
throw new ElasticsearchException("Error parsing the response, remote is likely not an Elasticsearch instance", e);
}
} catch (IOException e) {
throw new ElasticsearchException("Error deserializing response, remote is likely not an Elasticsearch instance", e);
}
listener.accept(parsedResponse);
}
}
@Override
public void onFailure(Exception e) {
try (ThreadContext.StoredContext ctx = contextSupplier.get()) {
// eliminates compiler warning
assert ctx != null;
if (e instanceof ResponseException) {
ResponseException re = (ResponseException) e;
if (RestStatus.TOO_MANY_REQUESTS.getStatus() == re.getResponse().getStatusLine().getStatusCode()) {
if (retries.hasNext()) {
TimeValue delay = retries.next();
logger.trace((Supplier<?>) () -> new ParameterizedMessage("retrying rejected search after [{}]", delay), e);
countSearchRetry.run();
threadPool.schedule(delay, ThreadPool.Names.SAME, RetryHelper.this);
return;
}
}
e = wrapExceptionToPreserveStatus(re.getResponse().getStatusLine().getStatusCode(), re.getResponse().getEntity(), re);
} else if (e instanceof ContentTooLongException) {
e = new IllegalArgumentException("Remote responded with a chunk that was too large. Use a smaller batch size.", e);
}
fail.accept(e);
}
}
});
}
@Override
public void onFailure(Exception t) {
fail.accept(t);
}
}
new RetryHelper().run();
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class AbstractSimpleTransportTestCase method testConcurrentSendRespondAndDisconnect.
public void testConcurrentSendRespondAndDisconnect() throws BrokenBarrierException, InterruptedException {
Set<Exception> sendingErrors = ConcurrentCollections.newConcurrentSet();
Set<Exception> responseErrors = ConcurrentCollections.newConcurrentSet();
serviceA.registerRequestHandler("test", TestRequest::new, randomBoolean() ? ThreadPool.Names.SAME : ThreadPool.Names.GENERIC, (request, channel) -> {
try {
channel.sendResponse(new TestResponse());
} catch (Exception e) {
logger.info("caught exception while responding", e);
responseErrors.add(e);
}
});
final TransportRequestHandler<TestRequest> ignoringRequestHandler = (request, channel) -> {
try {
channel.sendResponse(new TestResponse());
} catch (Exception e) {
// we don't really care what's going on B, we're testing through A
logger.trace("caught exception while responding from node B", e);
}
};
serviceB.registerRequestHandler("test", TestRequest::new, ThreadPool.Names.SAME, ignoringRequestHandler);
int halfSenders = scaledRandomIntBetween(3, 10);
final CyclicBarrier go = new CyclicBarrier(halfSenders * 2 + 1);
final CountDownLatch done = new CountDownLatch(halfSenders * 2);
for (int i = 0; i < halfSenders; i++) {
// B senders just generated activity so serciveA can respond, we don't test what's going on there
final int sender = i;
threadPool.executor(ThreadPool.Names.GENERIC).execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.trace("caught exception while sending from B", e);
}
@Override
protected void doRun() throws Exception {
go.await();
for (int iter = 0; iter < 10; iter++) {
PlainActionFuture<TestResponse> listener = new PlainActionFuture<>();
final String info = sender + "_B_" + iter;
serviceB.sendRequest(nodeA, "test", new TestRequest(info), new ActionListenerResponseHandler<>(listener, TestResponse::new));
try {
listener.actionGet();
} catch (Exception e) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("caught exception while sending to node {}", nodeA), e);
}
}
}
@Override
public void onAfter() {
done.countDown();
}
});
}
for (int i = 0; i < halfSenders; i++) {
final int sender = i;
threadPool.executor(ThreadPool.Names.GENERIC).execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.error("unexpected error", e);
sendingErrors.add(e);
}
@Override
protected void doRun() throws Exception {
go.await();
for (int iter = 0; iter < 10; iter++) {
PlainActionFuture<TestResponse> listener = new PlainActionFuture<>();
final String info = sender + "_" + iter;
// capture now
final DiscoveryNode node = nodeB;
try {
serviceA.sendRequest(node, "test", new TestRequest(info), new ActionListenerResponseHandler<>(listener, TestResponse::new));
try {
listener.actionGet();
} catch (ConnectTransportException e) {
// ok!
} catch (Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("caught exception while sending to node {}", node), e);
sendingErrors.add(e);
}
} catch (NodeNotConnectedException ex) {
// ok
}
}
}
@Override
public void onAfter() {
done.countDown();
}
});
}
go.await();
for (int i = 0; i <= 10; i++) {
if (i % 3 == 0) {
// simulate restart of nodeB
serviceB.close();
MockTransportService newService = buildService("TS_B_" + i, version1, null);
newService.registerRequestHandler("test", TestRequest::new, ThreadPool.Names.SAME, ignoringRequestHandler);
serviceB = newService;
nodeB = newService.getLocalDiscoNode();
serviceB.connectToNode(nodeA);
serviceA.connectToNode(nodeB);
} else if (serviceA.nodeConnected(nodeB)) {
serviceA.disconnectFromNode(nodeB);
} else {
serviceA.connectToNode(nodeB);
}
}
done.await();
assertThat("found non connection errors while sending", sendingErrors, empty());
assertThat("found non connection errors while responding", responseErrors, empty());
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class NodeJoinControllerTests method testSimpleMasterElection.
public void testSimpleMasterElection() throws InterruptedException, ExecutionException {
DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterService.state().nodes()).masterNodeId(null);
setState(clusterService, ClusterState.builder(clusterService.state()).nodes(nodes));
int nodeId = 0;
final int requiredJoins = 1 + randomInt(5);
logger.debug("--> using requiredJoins [{}]", requiredJoins);
// initial (failing) joins shouldn't count
for (int i = randomInt(5); i > 0; i--) {
try {
joinNode(newNode(nodeId++));
fail("failed to fail node join when not a master");
} catch (ExecutionException e) {
assertThat(e.getCause(), instanceOf(NotMasterException.class));
}
}
nodeJoinController.startElectionContext();
final SimpleFuture electionFuture = new SimpleFuture("master election");
final Thread masterElection = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.error("unexpected error from waitToBeElectedAsMaster", e);
electionFuture.markAsFailed(e);
}
@Override
protected void doRun() throws Exception {
nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() {
@Override
public void onElectedAsMaster(ClusterState state) {
assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true));
electionFuture.markAsDone();
}
@Override
public void onFailure(Throwable t) {
logger.error("unexpected error while waiting to be elected as master", t);
electionFuture.markAsFailed(t);
}
});
}
});
masterElection.start();
assertThat("election finished immediately but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false));
final int initialJoins = randomIntBetween(0, requiredJoins - 1);
final ArrayList<SimpleFuture> pendingJoins = new ArrayList<>();
ArrayList<DiscoveryNode> nodesToJoin = new ArrayList<>();
for (int i = 0; i < initialJoins; i++) {
DiscoveryNode node = newNode(nodeId++, true);
for (int j = 1 + randomInt(3); j > 0; j--) {
nodesToJoin.add(node);
}
}
// data nodes shouldn't count
for (int i = 0; i < requiredJoins; i++) {
DiscoveryNode node = newNode(nodeId++, false);
for (int j = 1 + randomInt(3); j > 0; j--) {
nodesToJoin.add(node);
}
}
// add
shuffle(nodesToJoin, random());
logger.debug("--> joining [{}] unique master nodes. Total of [{}] join requests", initialJoins, nodesToJoin.size());
for (DiscoveryNode node : nodesToJoin) {
pendingJoins.add(joinNodeAsync(node));
}
logger.debug("--> asserting master election didn't finish yet");
assertThat("election finished after [" + initialJoins + "] master nodes but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false));
final int finalJoins = requiredJoins - initialJoins + randomInt(5);
nodesToJoin.clear();
for (int i = 0; i < finalJoins; i++) {
DiscoveryNode node = newNode(nodeId++, true);
for (int j = 1 + randomInt(3); j > 0; j--) {
nodesToJoin.add(node);
}
}
for (int i = 0; i < requiredJoins; i++) {
DiscoveryNode node = newNode(nodeId++, false);
for (int j = 1 + randomInt(3); j > 0; j--) {
nodesToJoin.add(node);
}
}
shuffle(nodesToJoin, random());
logger.debug("--> joining [{}] nodes, with repetition a total of [{}]", finalJoins, nodesToJoin.size());
for (DiscoveryNode node : nodesToJoin) {
pendingJoins.add(joinNodeAsync(node));
}
logger.debug("--> waiting for master election to with no exception");
electionFuture.get();
logger.debug("--> waiting on all joins to be processed");
for (SimpleFuture future : pendingJoins) {
logger.debug("waiting on {}", future);
// throw any exception
future.get();
}
logger.debug("--> testing accumulation stopped");
nodeJoinController.startElectionContext();
nodeJoinController.stopElectionContext("test");
}
use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.
the class NodeJoinControllerTests method testNormalConcurrentJoins.
public void testNormalConcurrentJoins() throws InterruptedException {
Thread[] threads = new Thread[3 + randomInt(5)];
ArrayList<DiscoveryNode> nodes = new ArrayList<>();
nodes.add(clusterService.localNode());
final CyclicBarrier barrier = new CyclicBarrier(threads.length);
final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>();
for (int i = 0; i < threads.length; i++) {
final DiscoveryNode node = newNode(i);
final int iterations = rarely() ? randomIntBetween(1, 4) : 1;
nodes.add(node);
threads[i] = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.error("unexpected error in join thread", e);
backgroundExceptions.add(e);
}
@Override
protected void doRun() throws Exception {
barrier.await();
for (int i = 0; i < iterations; i++) {
logger.debug("{} joining", node);
joinNode(node);
}
}
}, "t_" + i);
threads[i].start();
}
logger.info("--> waiting for joins to complete");
for (Thread thread : threads) {
thread.join();
}
assertNodesInCurrentState(nodes);
}
Aggregations