Search in sources :

Example 1 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project crate by crate.

the class BlobRecoverySourceHandler method phase1.

/**
     * Perform phase1 of the recovery operations. Once this {@link SnapshotIndexCommit}
     * snapshot has been performed no commit operations (files being fsync'd)
     * are effectively allowed on this index until all recovery phases are done
     * <p/>
     * Phase1 examines the segment files on the target node and copies over the
     * segments that are missing. Only segments that have the same size and
     * checksum can be reused
     */
public void phase1(final SnapshotIndexCommit snapshot, final Translog.View translogView) {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        // CRATE CHANGE
        if (blobRecoveryHandler != null) {
            blobRecoveryHandler.phase1();
        }
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.engine().failEngine("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFiles()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
                throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
            }
        }
        // Generate a "diff" of all the identical, different, and missing
        // segment files on the target node, using the existing files on
        // the source node
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "(" + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace("[{}][{}] skipping [phase1] to {} - identical sync id [{}] found on both source and target", indexName, shardId, request.targetNode(), recoverySourceSyncId);
        } else {
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]", indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", indexName, shardId, request.targetNode(), md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote", indexName, shardId, request.targetNode(), md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }
            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;
            logger.trace("[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", indexName, shardId, request.targetNode(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(request.recoveryId(), request.shardId(), response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations());
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO, recoveryInfoFilesRequest, TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                }
            });
            // This latch will be used to wait until all files have been transferred to the target node
            final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
            final CopyOnWriteArrayList<Throwable> exceptions = new CopyOnWriteArrayList<>();
            final AtomicReference<Throwable> corruptedEngine = new AtomicReference<>();
            int fileIndex = 0;
            ThreadPoolExecutor pool;
            // How many bytes we've copied since we last called RateLimiter.pause
            final AtomicLong bytesSinceLastPause = new AtomicLong();
            for (final String name : response.phase1FileNames) {
                long fileSize = response.phase1FileSizes.get(fileIndex);
                // separately.
                if (fileSize > RecoverySettings.SMALL_FILE_CUTOFF_BYTES) {
                    pool = recoverySettings.concurrentStreamPool();
                } else {
                    pool = recoverySettings.concurrentSmallFileStreamPool();
                }
                pool.execute(new AbstractRunnable() {

                    @Override
                    public void onFailure(Throwable t) {
                        // we either got rejected or the store can't be incremented / we are canceled
                        logger.debug("Failed to transfer file [" + name + "] on recovery");
                    }

                    @Override
                    public void onAfter() {
                        // Signify this file has completed by decrementing the latch
                        latch.countDown();
                    }

                    @Override
                    protected void doRun() {
                        cancellableThreads.checkForCancel();
                        store.incRef();
                        final StoreFileMetaData md = recoverySourceMetadata.get(name);
                        try (final IndexInput indexInput = store.directory().openInput(name, IOContext.READONCE)) {
                            // at least one!
                            final int BUFFER_SIZE = (int) Math.max(1, recoverySettings.fileChunkSize().getBytes());
                            final byte[] buf = new byte[BUFFER_SIZE];
                            boolean shouldCompressRequest = recoverySettings.compress();
                            if (CompressorFactory.isCompressed(indexInput)) {
                                shouldCompressRequest = false;
                            }
                            final long len = indexInput.length();
                            long readCount = 0;
                            final TransportRequestOptions requestOptions = TransportRequestOptions.builder().withCompress(shouldCompressRequest).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(recoverySettings.internalActionTimeout()).build();
                            while (readCount < len) {
                                if (shard.state() == IndexShardState.CLOSED) {
                                    // check if the shard got closed on us
                                    throw new IndexShardClosedException(shard.shardId());
                                }
                                int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount) : BUFFER_SIZE;
                                final long position = indexInput.getFilePointer();
                                // Pause using the rate limiter, if desired, to throttle the recovery
                                RateLimiter rl = recoverySettings.rateLimiter();
                                long throttleTimeInNanos = 0;
                                if (rl != null) {
                                    long bytes = bytesSinceLastPause.addAndGet(toRead);
                                    if (bytes > rl.getMinPauseCheckBytes()) {
                                        // Time to pause
                                        bytesSinceLastPause.addAndGet(-bytes);
                                        throttleTimeInNanos = rl.pause(bytes);
                                        shard.recoveryStats().addThrottleTime(throttleTimeInNanos);
                                    }
                                }
                                indexInput.readBytes(buf, 0, toRead, false);
                                final BytesArray content = new BytesArray(buf, 0, toRead);
                                readCount += toRead;
                                final boolean lastChunk = readCount == len;
                                final RecoveryFileChunkRequest fileChunkRequest = new RecoveryFileChunkRequest(request.recoveryId(), request.shardId(), md, position, content, lastChunk, translogView.totalOperations(), throttleTimeInNanos);
                                cancellableThreads.execute(new Interruptable() {

                                    @Override
                                    public void run() throws InterruptedException {
                                        // Actually send the file chunk to the target node, waiting for it to complete
                                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILE_CHUNK, fileChunkRequest, requestOptions, EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                                    }
                                });
                            }
                        } catch (Throwable e) {
                            final Throwable corruptIndexException;
                            if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(e)) != null) {
                                if (store.checkIntegrityNoException(md) == false) {
                                    // we are corrupted on the primary -- fail!
                                    logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
                                    if (corruptedEngine.compareAndSet(null, corruptIndexException) == false) {
                                        // if we are not the first exception, add ourselves as suppressed to the main one:
                                        corruptedEngine.get().addSuppressed(e);
                                    }
                                } else {
                                    // corruption has happened on the way to replica
                                    RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
                                    exception.addSuppressed(e);
                                    // last exception first
                                    exceptions.add(0, exception);
                                    logger.warn("{} Remote file corruption on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode(), md);
                                }
                            } else {
                                // last exceptions first
                                exceptions.add(0, e);
                            }
                        } finally {
                            store.decRef();
                        }
                    }
                });
                fileIndex++;
            }
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    // Wait for all files that need to be transferred to finish transferring
                    latch.await();
                }
            });
            if (corruptedEngine.get() != null) {
                shard.engine().failEngine("recovery", corruptedEngine.get());
                throw corruptedEngine.get();
            } else {
                ExceptionsHelper.rethrowAndSuppress(exceptions);
            }
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    // are deleted
                    try {
                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                    } catch (RemoteTransportException remoteException) {
                        final IOException corruptIndexException;
                        //   - maybe due to old segments without checksums or length only checks
                        if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(remoteException)) != null) {
                            try {
                                final Store.MetadataSnapshot recoverySourceMetadata = store.getMetadata(snapshot);
                                StoreFileMetaData[] metadata = Iterables.toArray(recoverySourceMetadata, StoreFileMetaData.class);
                                ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() {

                                    @Override
                                    public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
                                        // check small files first
                                        return Long.compare(o1.length(), o2.length());
                                    }
                                });
                                for (StoreFileMetaData md : metadata) {
                                    logger.debug("{} checking integrity for file {} after remove corruption exception", shard.shardId(), md);
                                    if (store.checkIntegrityNoException(md) == false) {
                                        // we are corrupted on the primary -- fail!
                                        shard.engine().failEngine("recovery", corruptIndexException);
                                        logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
                                        throw corruptIndexException;
                                    }
                                }
                            } catch (IOException ex) {
                                remoteException.addSuppressed(ex);
                                throw remoteException;
                            }
                            // corruption has happened on the way to replica
                            RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
                            exception.addSuppressed(remoteException);
                            logger.warn("{} Remote file corruption during finalization on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode());
                            throw exception;
                        } else {
                            throw remoteException;
                        }
                    }
                }
            });
        }
        prepareTargetForTranslog(translogView);
        logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", indexName, shardId, request.targetNode(), stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Throwable e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) Store(org.elasticsearch.index.store.Store) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IndexInput(org.apache.lucene.store.IndexInput) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) BytesArray(org.elasticsearch.common.bytes.BytesArray) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) Interruptable(org.elasticsearch.common.util.CancellableThreads.Interruptable) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) RateLimiter(org.apache.lucene.store.RateLimiter) StopWatch(org.elasticsearch.common.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 2 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class RemoteScrollableHitSource method execute.

private <T> void execute(String method, String uri, Map<String, String> params, HttpEntity entity, BiFunction<XContentParser, XContentType, T> parser, Consumer<? super T> listener) {
    // Preserve the thread context so headers survive after the call
    java.util.function.Supplier<ThreadContext.StoredContext> contextSupplier = threadPool.getThreadContext().newRestorableContext(true);
    class RetryHelper extends AbstractRunnable {

        private final Iterator<TimeValue> retries = backoffPolicy.iterator();

        @Override
        protected void doRun() throws Exception {
            client.performRequestAsync(method, uri, params, entity, new ResponseListener() {

                @Override
                public void onSuccess(org.elasticsearch.client.Response response) {
                    // Restore the thread context to get the precious headers
                    try (ThreadContext.StoredContext ctx = contextSupplier.get()) {
                        // eliminates compiler warning
                        assert ctx != null;
                        T parsedResponse;
                        try {
                            HttpEntity responseEntity = response.getEntity();
                            InputStream content = responseEntity.getContent();
                            XContentType xContentType = null;
                            if (responseEntity.getContentType() != null) {
                                final String mimeType = ContentType.parse(responseEntity.getContentType().getValue()).getMimeType();
                                xContentType = XContentType.fromMediaType(mimeType);
                            }
                            if (xContentType == null) {
                                try {
                                    throw new ElasticsearchException("Response didn't include Content-Type: " + bodyMessage(response.getEntity()));
                                } catch (IOException e) {
                                    ElasticsearchException ee = new ElasticsearchException("Error extracting body from response");
                                    ee.addSuppressed(e);
                                    throw ee;
                                }
                            }
                            // EMPTY is safe here because we don't call namedObject
                            try (XContentParser xContentParser = xContentType.xContent().createParser(NamedXContentRegistry.EMPTY, content)) {
                                parsedResponse = parser.apply(xContentParser, xContentType);
                            } catch (ParsingException e) {
                                /* Because we're streaming the response we can't get a copy of it here. The best we can do is hint that it
                                 * is totally wrong and we're probably not talking to Elasticsearch. */
                                throw new ElasticsearchException("Error parsing the response, remote is likely not an Elasticsearch instance", e);
                            }
                        } catch (IOException e) {
                            throw new ElasticsearchException("Error deserializing response, remote is likely not an Elasticsearch instance", e);
                        }
                        listener.accept(parsedResponse);
                    }
                }

                @Override
                public void onFailure(Exception e) {
                    try (ThreadContext.StoredContext ctx = contextSupplier.get()) {
                        // eliminates compiler warning
                        assert ctx != null;
                        if (e instanceof ResponseException) {
                            ResponseException re = (ResponseException) e;
                            if (RestStatus.TOO_MANY_REQUESTS.getStatus() == re.getResponse().getStatusLine().getStatusCode()) {
                                if (retries.hasNext()) {
                                    TimeValue delay = retries.next();
                                    logger.trace((Supplier<?>) () -> new ParameterizedMessage("retrying rejected search after [{}]", delay), e);
                                    countSearchRetry.run();
                                    threadPool.schedule(delay, ThreadPool.Names.SAME, RetryHelper.this);
                                    return;
                                }
                            }
                            e = wrapExceptionToPreserveStatus(re.getResponse().getStatusLine().getStatusCode(), re.getResponse().getEntity(), re);
                        } else if (e instanceof ContentTooLongException) {
                            e = new IllegalArgumentException("Remote responded with a chunk that was too large. Use a smaller batch size.", e);
                        }
                        fail.accept(e);
                    }
                }
            });
        }

        @Override
        public void onFailure(Exception t) {
            fail.accept(t);
        }
    }
    new RetryHelper().run();
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) HttpEntity(org.apache.http.HttpEntity) ResponseException(org.elasticsearch.client.ResponseException) ElasticsearchException(org.elasticsearch.ElasticsearchException) XContentType(org.elasticsearch.common.xcontent.XContentType) ParsingException(org.elasticsearch.common.ParsingException) Iterator(java.util.Iterator) Supplier(org.apache.logging.log4j.util.Supplier) TimeValue(org.elasticsearch.common.unit.TimeValue) InputStream(java.io.InputStream) ContentTooLongException(org.apache.http.ContentTooLongException) ResponseListener(org.elasticsearch.client.ResponseListener) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) ResponseException(org.elasticsearch.client.ResponseException) ContentTooLongException(org.apache.http.ContentTooLongException) ParsingException(org.elasticsearch.common.ParsingException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) IOException(java.io.IOException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Example 3 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class AbstractSimpleTransportTestCase method testConcurrentSendRespondAndDisconnect.

public void testConcurrentSendRespondAndDisconnect() throws BrokenBarrierException, InterruptedException {
    Set<Exception> sendingErrors = ConcurrentCollections.newConcurrentSet();
    Set<Exception> responseErrors = ConcurrentCollections.newConcurrentSet();
    serviceA.registerRequestHandler("test", TestRequest::new, randomBoolean() ? ThreadPool.Names.SAME : ThreadPool.Names.GENERIC, (request, channel) -> {
        try {
            channel.sendResponse(new TestResponse());
        } catch (Exception e) {
            logger.info("caught exception while responding", e);
            responseErrors.add(e);
        }
    });
    final TransportRequestHandler<TestRequest> ignoringRequestHandler = (request, channel) -> {
        try {
            channel.sendResponse(new TestResponse());
        } catch (Exception e) {
            // we don't really care what's going on B, we're testing through A
            logger.trace("caught exception while responding from node B", e);
        }
    };
    serviceB.registerRequestHandler("test", TestRequest::new, ThreadPool.Names.SAME, ignoringRequestHandler);
    int halfSenders = scaledRandomIntBetween(3, 10);
    final CyclicBarrier go = new CyclicBarrier(halfSenders * 2 + 1);
    final CountDownLatch done = new CountDownLatch(halfSenders * 2);
    for (int i = 0; i < halfSenders; i++) {
        // B senders just generated activity so serciveA can respond, we don't test what's going on there
        final int sender = i;
        threadPool.executor(ThreadPool.Names.GENERIC).execute(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                logger.trace("caught exception while sending from B", e);
            }

            @Override
            protected void doRun() throws Exception {
                go.await();
                for (int iter = 0; iter < 10; iter++) {
                    PlainActionFuture<TestResponse> listener = new PlainActionFuture<>();
                    final String info = sender + "_B_" + iter;
                    serviceB.sendRequest(nodeA, "test", new TestRequest(info), new ActionListenerResponseHandler<>(listener, TestResponse::new));
                    try {
                        listener.actionGet();
                    } catch (Exception e) {
                        logger.trace((Supplier<?>) () -> new ParameterizedMessage("caught exception while sending to node {}", nodeA), e);
                    }
                }
            }

            @Override
            public void onAfter() {
                done.countDown();
            }
        });
    }
    for (int i = 0; i < halfSenders; i++) {
        final int sender = i;
        threadPool.executor(ThreadPool.Names.GENERIC).execute(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                logger.error("unexpected error", e);
                sendingErrors.add(e);
            }

            @Override
            protected void doRun() throws Exception {
                go.await();
                for (int iter = 0; iter < 10; iter++) {
                    PlainActionFuture<TestResponse> listener = new PlainActionFuture<>();
                    final String info = sender + "_" + iter;
                    // capture now
                    final DiscoveryNode node = nodeB;
                    try {
                        serviceA.sendRequest(node, "test", new TestRequest(info), new ActionListenerResponseHandler<>(listener, TestResponse::new));
                        try {
                            listener.actionGet();
                        } catch (ConnectTransportException e) {
                        // ok!
                        } catch (Exception e) {
                            logger.error((Supplier<?>) () -> new ParameterizedMessage("caught exception while sending to node {}", node), e);
                            sendingErrors.add(e);
                        }
                    } catch (NodeNotConnectedException ex) {
                    // ok
                    }
                }
            }

            @Override
            public void onAfter() {
                done.countDown();
            }
        });
    }
    go.await();
    for (int i = 0; i <= 10; i++) {
        if (i % 3 == 0) {
            // simulate restart of nodeB
            serviceB.close();
            MockTransportService newService = buildService("TS_B_" + i, version1, null);
            newService.registerRequestHandler("test", TestRequest::new, ThreadPool.Names.SAME, ignoringRequestHandler);
            serviceB = newService;
            nodeB = newService.getLocalDiscoNode();
            serviceB.connectToNode(nodeA);
            serviceA.connectToNode(nodeB);
        } else if (serviceA.nodeConnected(nodeB)) {
            serviceA.disconnectFromNode(nodeB);
        } else {
            serviceA.connectToNode(nodeB);
        }
    }
    done.await();
    assertThat("found non connection errors while sending", sendingErrors, empty());
    assertThat("found non connection errors while responding", responseErrors, empty());
}
Also used : ElasticsearchException(org.elasticsearch.ElasticsearchException) StreamOutput(org.elasticsearch.common.io.stream.StreamOutput) Arrays(java.util.Arrays) BigArrays(org.elasticsearch.common.util.BigArrays) ConcurrentCollections(org.elasticsearch.common.util.concurrent.ConcurrentCollections) InetAddress(java.net.InetAddress) ServerSocket(java.net.ServerSocket) Settings(org.elasticsearch.common.settings.Settings) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Map(java.util.Map) ThreadPool(org.elasticsearch.threadpool.ThreadPool) CyclicBarrier(java.util.concurrent.CyclicBarrier) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) PlainActionFuture(org.elasticsearch.action.support.PlainActionFuture) Set(java.util.Set) InetSocketAddress(java.net.InetSocketAddress) Matchers.startsWith(org.hamcrest.Matchers.startsWith) UncheckedIOException(java.io.UncheckedIOException) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) CountDownLatch(java.util.concurrent.CountDownLatch) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) List(java.util.List) Version(org.elasticsearch.Version) TransportAddress(org.elasticsearch.common.transport.TransportAddress) Supplier(org.apache.logging.log4j.util.Supplier) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Socket(java.net.Socket) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) NetworkService(org.elasticsearch.common.network.NetworkService) ActionListenerResponseHandler(org.elasticsearch.action.ActionListenerResponseHandler) NoneCircuitBreakerService(org.elasticsearch.indices.breaker.NoneCircuitBreakerService) NamedWriteableRegistry(org.elasticsearch.common.io.stream.NamedWriteableRegistry) TimeValue(org.elasticsearch.common.unit.TimeValue) Node(org.elasticsearch.node.Node) ESTestCase(org.elasticsearch.test.ESTestCase) MockTransportService(org.elasticsearch.test.transport.MockTransportService) Before(org.junit.Before) Collections.emptyMap(java.util.Collections.emptyMap) TestThreadPool(org.elasticsearch.threadpool.TestThreadPool) Matchers.empty(org.hamcrest.Matchers.empty) Collections.emptySet(java.util.Collections.emptySet) Semaphore(java.util.concurrent.Semaphore) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) MockServerSocket(org.elasticsearch.mocksocket.MockServerSocket) VersionUtils(org.elasticsearch.test.VersionUtils) CollectionUtil(org.apache.lucene.util.CollectionUtil) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) Constants(org.apache.lucene.util.Constants) StreamInput(org.elasticsearch.common.io.stream.StreamInput) Collections(java.util.Collections) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) MockTransportService(org.elasticsearch.test.transport.MockTransportService) CountDownLatch(java.util.concurrent.CountDownLatch) ElasticsearchException(org.elasticsearch.ElasticsearchException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ExecutionException(java.util.concurrent.ExecutionException) CyclicBarrier(java.util.concurrent.CyclicBarrier) PlainActionFuture(org.elasticsearch.action.support.PlainActionFuture) ActionListenerResponseHandler(org.elasticsearch.action.ActionListenerResponseHandler) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 4 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class NodeJoinControllerTests method testSimpleMasterElection.

public void testSimpleMasterElection() throws InterruptedException, ExecutionException {
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterService.state().nodes()).masterNodeId(null);
    setState(clusterService, ClusterState.builder(clusterService.state()).nodes(nodes));
    int nodeId = 0;
    final int requiredJoins = 1 + randomInt(5);
    logger.debug("--> using requiredJoins [{}]", requiredJoins);
    // initial (failing) joins shouldn't count
    for (int i = randomInt(5); i > 0; i--) {
        try {
            joinNode(newNode(nodeId++));
            fail("failed to fail node join when not a master");
        } catch (ExecutionException e) {
            assertThat(e.getCause(), instanceOf(NotMasterException.class));
        }
    }
    nodeJoinController.startElectionContext();
    final SimpleFuture electionFuture = new SimpleFuture("master election");
    final Thread masterElection = new Thread(new AbstractRunnable() {

        @Override
        public void onFailure(Exception e) {
            logger.error("unexpected error from waitToBeElectedAsMaster", e);
            electionFuture.markAsFailed(e);
        }

        @Override
        protected void doRun() throws Exception {
            nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() {

                @Override
                public void onElectedAsMaster(ClusterState state) {
                    assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true));
                    electionFuture.markAsDone();
                }

                @Override
                public void onFailure(Throwable t) {
                    logger.error("unexpected error while waiting to be elected as master", t);
                    electionFuture.markAsFailed(t);
                }
            });
        }
    });
    masterElection.start();
    assertThat("election finished immediately but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false));
    final int initialJoins = randomIntBetween(0, requiredJoins - 1);
    final ArrayList<SimpleFuture> pendingJoins = new ArrayList<>();
    ArrayList<DiscoveryNode> nodesToJoin = new ArrayList<>();
    for (int i = 0; i < initialJoins; i++) {
        DiscoveryNode node = newNode(nodeId++, true);
        for (int j = 1 + randomInt(3); j > 0; j--) {
            nodesToJoin.add(node);
        }
    }
    // data nodes shouldn't count
    for (int i = 0; i < requiredJoins; i++) {
        DiscoveryNode node = newNode(nodeId++, false);
        for (int j = 1 + randomInt(3); j > 0; j--) {
            nodesToJoin.add(node);
        }
    }
    // add
    shuffle(nodesToJoin, random());
    logger.debug("--> joining [{}] unique master nodes. Total of [{}] join requests", initialJoins, nodesToJoin.size());
    for (DiscoveryNode node : nodesToJoin) {
        pendingJoins.add(joinNodeAsync(node));
    }
    logger.debug("--> asserting master election didn't finish yet");
    assertThat("election finished after [" + initialJoins + "] master nodes but required joins is [" + requiredJoins + "]", electionFuture.isDone(), equalTo(false));
    final int finalJoins = requiredJoins - initialJoins + randomInt(5);
    nodesToJoin.clear();
    for (int i = 0; i < finalJoins; i++) {
        DiscoveryNode node = newNode(nodeId++, true);
        for (int j = 1 + randomInt(3); j > 0; j--) {
            nodesToJoin.add(node);
        }
    }
    for (int i = 0; i < requiredJoins; i++) {
        DiscoveryNode node = newNode(nodeId++, false);
        for (int j = 1 + randomInt(3); j > 0; j--) {
            nodesToJoin.add(node);
        }
    }
    shuffle(nodesToJoin, random());
    logger.debug("--> joining [{}] nodes, with repetition a total of [{}]", finalJoins, nodesToJoin.size());
    for (DiscoveryNode node : nodesToJoin) {
        pendingJoins.add(joinNodeAsync(node));
    }
    logger.debug("--> waiting for master election to with no exception");
    electionFuture.get();
    logger.debug("--> waiting on all joins to be processed");
    for (SimpleFuture future : pendingJoins) {
        logger.debug("waiting on {}", future);
        // throw any exception
        future.get();
    }
    logger.debug("--> testing accumulation stopped");
    nodeJoinController.startElectionContext();
    nodeJoinController.stopElectionContext("test");
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) NotMasterException(org.elasticsearch.cluster.NotMasterException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ExecutionException(java.util.concurrent.ExecutionException) ExecutionException(java.util.concurrent.ExecutionException) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Example 5 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class NodeJoinControllerTests method testNormalConcurrentJoins.

public void testNormalConcurrentJoins() throws InterruptedException {
    Thread[] threads = new Thread[3 + randomInt(5)];
    ArrayList<DiscoveryNode> nodes = new ArrayList<>();
    nodes.add(clusterService.localNode());
    final CyclicBarrier barrier = new CyclicBarrier(threads.length);
    final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>();
    for (int i = 0; i < threads.length; i++) {
        final DiscoveryNode node = newNode(i);
        final int iterations = rarely() ? randomIntBetween(1, 4) : 1;
        nodes.add(node);
        threads[i] = new Thread(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                logger.error("unexpected error in join thread", e);
                backgroundExceptions.add(e);
            }

            @Override
            protected void doRun() throws Exception {
                barrier.await();
                for (int i = 0; i < iterations; i++) {
                    logger.debug("{} joining", node);
                    joinNode(node);
                }
            }
        }, "t_" + i);
        threads[i].start();
    }
    logger.info("--> waiting for joins to complete");
    for (Thread thread : threads) {
        thread.join();
    }
    assertNodesInCurrentState(nodes);
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) NotMasterException(org.elasticsearch.cluster.NotMasterException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ExecutionException(java.util.concurrent.ExecutionException) CyclicBarrier(java.util.concurrent.CyclicBarrier) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Aggregations

AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)33 IOException (java.io.IOException)19 ExecutionException (java.util.concurrent.ExecutionException)11 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)10 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)9 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)9 CountDownLatch (java.util.concurrent.CountDownLatch)8 CyclicBarrier (java.util.concurrent.CyclicBarrier)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 TimeValue (org.elasticsearch.common.unit.TimeValue)8 ElasticsearchException (org.elasticsearch.ElasticsearchException)7 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)6 Supplier (org.apache.logging.log4j.util.Supplier)6 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)6 TestThreadPool (org.elasticsearch.threadpool.TestThreadPool)5 UnknownHostException (java.net.UnknownHostException)4 ArrayList (java.util.ArrayList)4 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)4 ClusterState (org.elasticsearch.cluster.ClusterState)4 NotMasterException (org.elasticsearch.cluster.NotMasterException)4