Search in sources :

Example 21 with ConnectTransportException

use of org.elasticsearch.transport.ConnectTransportException in project crate by crate.

the class IndexRecoveryIT method testDisconnectsDuringRecovery.

/**
 * Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
 * the source is working on the recovery process.
 */
@Test
public void testDisconnectsDuringRecovery() throws Exception {
    boolean primaryRelocation = randomBoolean();
    final String indexName = IndexParts.toIndexName(sqlExecutor.getCurrentSchema(), "test", null);
    final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
    TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
    // start a master node
    String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
    final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
    final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
    execute("CREATE TABLE test (id int) CLUSTERED INTO 1 SHARDS " + "WITH (" + " number_of_replicas=0," + " \"routing.allocation.include.color\" = 'blue'" + ")");
    int numDocs = scaledRandomIntBetween(25, 250);
    var args = new Object[numDocs][];
    for (int i = 0; i < numDocs; i++) {
        args[i] = new Object[] { i };
    }
    execute("INSERT INTO test (id) VALUES (?)", args);
    ensureGreen();
    refresh();
    var searchResponse = execute("SELECT COUNT(*) FROM test");
    assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
    MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
    MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
    MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
    redMockTransportService.addSendBehavior(blueMockTransportService, new StubbableTransport.SendRequestBehavior() {

        private final AtomicInteger count = new AtomicInteger();

        @Override
        public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
            logger.info("--> sending request {} on {}", action, connection.getNode());
            if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
                // ensures that it's considered as valid recovery attempt by source
                try {
                    assertBusy(() -> assertThat("Expected there to be some initializing shards", client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index(indexName).shard(0).getAllInitializingShards(), not(empty())));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
                connection.sendRequest(requestId, action, request, options);
                try {
                    Thread.sleep(disconnectAfterDelay.millis());
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
                throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
            } else {
                connection.sendRequest(requestId, action, request, options);
            }
        }
    });
    final AtomicBoolean finalized = new AtomicBoolean();
    blueMockTransportService.addSendBehavior(redMockTransportService, (connection, requestId, action, request, options) -> {
        logger.info("--> sending request {} on {}", action, connection.getNode());
        if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
            finalized.set(true);
        }
        connection.sendRequest(requestId, action, request, options);
    });
    for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
        mockTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
            logger.info("--> sending request {} on {}", action, connection.getNode());
            if ((primaryRelocation && finalized.get()) == false) {
                assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
            }
            connection.sendRequest(requestId, action, request, options);
        });
    }
    if (primaryRelocation) {
        logger.info("--> starting primary relocation recovery from blue to red");
        execute("ALTER TABLE test SET (" + " \"routing.allocation.include.color\" = 'red'" + ")");
        // also waits for relocation / recovery to complete
        ensureGreen();
        // if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
        // source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
        // shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
        // will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
        // searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
        refresh();
    } else {
        logger.info("--> starting replica recovery from blue to red");
        execute("ALTER TABLE test SET (" + " number_of_replicas=1," + " \"routing.allocation.include.color\" = 'red,blue'" + ")");
        ensureGreen();
    }
    for (int i = 0; i < 10; i++) {
        searchResponse = execute("SELECT COUNT(*) FROM test");
        assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
    }
}
Also used : TransportRequest(org.elasticsearch.transport.TransportRequest) MockTransportService(org.elasticsearch.test.transport.MockTransportService) IOException(java.io.IOException) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) IOException(java.io.IOException) EsRejectedExecutionException(org.elasticsearch.common.util.concurrent.EsRejectedExecutionException) MapperParsingException(org.elasticsearch.index.mapper.MapperParsingException) CircuitBreakingException(org.elasticsearch.common.breaker.CircuitBreakingException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransportService(org.elasticsearch.transport.TransportService) MockTransportService(org.elasticsearch.test.transport.MockTransportService) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) Transport(org.elasticsearch.transport.Transport) StubbableTransport(org.elasticsearch.test.transport.StubbableTransport) StubbableTransport(org.elasticsearch.test.transport.StubbableTransport) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) TimeValue(io.crate.common.unit.TimeValue) Test(org.junit.Test)

Example 22 with ConnectTransportException

use of org.elasticsearch.transport.ConnectTransportException in project crate by crate.

the class MockTransportService method addUnresponsiveRule.

/**
 * Adds a rule that will cause ignores each send request, simulating an unresponsive node
 * and failing to connect once the rule was added.
 *
 * @param duration the amount of time to delay sending and connecting.
 */
public void addUnresponsiveRule(TransportAddress transportAddress, final TimeValue duration) {
    final long startTime = System.currentTimeMillis();
    Supplier<TimeValue> delaySupplier = () -> new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
    transport().addConnectBehavior(transportAddress, new StubbableTransport.OpenConnectionBehavior() {

        private CountDownLatch stopLatch = new CountDownLatch(1);

        @Override
        public void openConnection(Transport transport, DiscoveryNode discoveryNode, ConnectionProfile profile, ActionListener<Transport.Connection> listener) {
            TimeValue delay = delaySupplier.get();
            if (delay.millis() <= 0) {
                original.openConnection(discoveryNode, profile, listener);
                return;
            }
            // TODO: Replace with proper setting
            TimeValue connectingTimeout = TransportSettings.CONNECT_TIMEOUT.getDefault(Settings.EMPTY);
            try {
                if (delay.millis() < connectingTimeout.millis()) {
                    stopLatch.await(delay.millis(), TimeUnit.MILLISECONDS);
                    original.openConnection(discoveryNode, profile, listener);
                } else {
                    stopLatch.await(connectingTimeout.millis(), TimeUnit.MILLISECONDS);
                    listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
                }
            } catch (InterruptedException e) {
                listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
            }
        }

        @Override
        public void clearCallback() {
            stopLatch.countDown();
        }
    });
    transport().addSendBehavior(transportAddress, new StubbableTransport.SendRequestBehavior() {

        private final Queue<Runnable> requestsToSendWhenCleared = new LinkedBlockingDeque<>();

        private boolean cleared = false;

        @Override
        public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
            // delayed sending - even if larger then the request timeout to simulated a potential late response from target node
            TimeValue delay = delaySupplier.get();
            if (delay.millis() <= 0) {
                connection.sendRequest(requestId, action, request, options);
                return;
            }
            // poor mans request cloning...
            RequestHandlerRegistry reg = MockTransportService.this.getRequestHandler(action);
            BytesStreamOutput bStream = new BytesStreamOutput();
            request.writeTo(bStream);
            final TransportRequest clonedRequest = reg.newRequest(bStream.bytes().streamInput());
            Runnable runnable = new AbstractRunnable() {

                AtomicBoolean requestSent = new AtomicBoolean();

                @Override
                public void onFailure(Exception e) {
                    LOGGER.debug("failed to send delayed request", e);
                }

                @Override
                protected void doRun() throws IOException {
                    if (requestSent.compareAndSet(false, true)) {
                        connection.sendRequest(requestId, action, clonedRequest, options);
                    }
                }
            };
            // store the request to send it once the rule is cleared.
            synchronized (this) {
                if (cleared) {
                    runnable.run();
                } else {
                    requestsToSendWhenCleared.add(runnable);
                    threadPool.schedule(runnable, delay, ThreadPool.Names.GENERIC);
                }
            }
        }

        @Override
        public void clearCallback() {
            synchronized (this) {
                assert cleared == false;
                cleared = true;
                requestsToSendWhenCleared.forEach(Runnable::run);
            }
        }
    });
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) TimeValue(io.crate.common.unit.TimeValue) TransportRequest(org.elasticsearch.transport.TransportRequest) ConnectionProfile(org.elasticsearch.transport.ConnectionProfile) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) IOException(java.io.IOException) RequestHandlerRegistry(org.elasticsearch.transport.RequestHandlerRegistry) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) Transport(org.elasticsearch.transport.Transport) TcpTransport(org.elasticsearch.transport.TcpTransport) Netty4Transport(org.elasticsearch.transport.netty4.Netty4Transport)

Aggregations

ConnectTransportException (org.elasticsearch.transport.ConnectTransportException)22 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)12 IOException (java.io.IOException)11 TransportRequest (org.elasticsearch.transport.TransportRequest)10 TransportRequestOptions (org.elasticsearch.transport.TransportRequestOptions)9 TransportService (org.elasticsearch.transport.TransportService)8 ClusterState (org.elasticsearch.cluster.ClusterState)7 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)6 Settings (org.elasticsearch.common.settings.Settings)6 TimeValue (org.elasticsearch.common.unit.TimeValue)6 MockTransportService (org.elasticsearch.test.transport.MockTransportService)5 TransportException (org.elasticsearch.transport.TransportException)5 TimeValue (io.crate.common.unit.TimeValue)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 ElasticsearchException (org.elasticsearch.ElasticsearchException)4 AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)4 TransportResponseHandler (org.elasticsearch.transport.TransportResponseHandler)4 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 CountDownLatch (java.util.concurrent.CountDownLatch)3