use of org.elasticsearch.transport.ConnectTransportException in project crate by crate.
the class IndexRecoveryIT method testDisconnectsDuringRecovery.
/**
* Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
* the source is working on the recovery process.
*/
@Test
public void testDisconnectsDuringRecovery() throws Exception {
boolean primaryRelocation = randomBoolean();
final String indexName = IndexParts.toIndexName(sqlExecutor.getCurrentSchema(), "test", null);
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
// start a master node
String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
execute("CREATE TABLE test (id int) CLUSTERED INTO 1 SHARDS " + "WITH (" + " number_of_replicas=0," + " \"routing.allocation.include.color\" = 'blue'" + ")");
int numDocs = scaledRandomIntBetween(25, 250);
var args = new Object[numDocs][];
for (int i = 0; i < numDocs; i++) {
args[i] = new Object[] { i };
}
execute("INSERT INTO test (id) VALUES (?)", args);
ensureGreen();
refresh();
var searchResponse = execute("SELECT COUNT(*) FROM test");
assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
redMockTransportService.addSendBehavior(blueMockTransportService, new StubbableTransport.SendRequestBehavior() {
private final AtomicInteger count = new AtomicInteger();
@Override
public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
// ensures that it's considered as valid recovery attempt by source
try {
assertBusy(() -> assertThat("Expected there to be some initializing shards", client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index(indexName).shard(0).getAllInitializingShards(), not(empty())));
} catch (Exception e) {
throw new RuntimeException(e);
}
connection.sendRequest(requestId, action, request, options);
try {
Thread.sleep(disconnectAfterDelay.millis());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
} else {
connection.sendRequest(requestId, action, request, options);
}
}
});
final AtomicBoolean finalized = new AtomicBoolean();
blueMockTransportService.addSendBehavior(redMockTransportService, (connection, requestId, action, request, options) -> {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
finalized.set(true);
}
connection.sendRequest(requestId, action, request, options);
});
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
mockTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
logger.info("--> sending request {} on {}", action, connection.getNode());
if ((primaryRelocation && finalized.get()) == false) {
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
}
connection.sendRequest(requestId, action, request, options);
});
}
if (primaryRelocation) {
logger.info("--> starting primary relocation recovery from blue to red");
execute("ALTER TABLE test SET (" + " \"routing.allocation.include.color\" = 'red'" + ")");
// also waits for relocation / recovery to complete
ensureGreen();
// if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
// source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
// shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
// will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
// searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
refresh();
} else {
logger.info("--> starting replica recovery from blue to red");
execute("ALTER TABLE test SET (" + " number_of_replicas=1," + " \"routing.allocation.include.color\" = 'red,blue'" + ")");
ensureGreen();
}
for (int i = 0; i < 10; i++) {
searchResponse = execute("SELECT COUNT(*) FROM test");
assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
}
}
use of org.elasticsearch.transport.ConnectTransportException in project crate by crate.
the class MockTransportService method addUnresponsiveRule.
/**
* Adds a rule that will cause ignores each send request, simulating an unresponsive node
* and failing to connect once the rule was added.
*
* @param duration the amount of time to delay sending and connecting.
*/
public void addUnresponsiveRule(TransportAddress transportAddress, final TimeValue duration) {
final long startTime = System.currentTimeMillis();
Supplier<TimeValue> delaySupplier = () -> new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
transport().addConnectBehavior(transportAddress, new StubbableTransport.OpenConnectionBehavior() {
private CountDownLatch stopLatch = new CountDownLatch(1);
@Override
public void openConnection(Transport transport, DiscoveryNode discoveryNode, ConnectionProfile profile, ActionListener<Transport.Connection> listener) {
TimeValue delay = delaySupplier.get();
if (delay.millis() <= 0) {
original.openConnection(discoveryNode, profile, listener);
return;
}
// TODO: Replace with proper setting
TimeValue connectingTimeout = TransportSettings.CONNECT_TIMEOUT.getDefault(Settings.EMPTY);
try {
if (delay.millis() < connectingTimeout.millis()) {
stopLatch.await(delay.millis(), TimeUnit.MILLISECONDS);
original.openConnection(discoveryNode, profile, listener);
} else {
stopLatch.await(connectingTimeout.millis(), TimeUnit.MILLISECONDS);
listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
}
} catch (InterruptedException e) {
listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
}
}
@Override
public void clearCallback() {
stopLatch.countDown();
}
});
transport().addSendBehavior(transportAddress, new StubbableTransport.SendRequestBehavior() {
private final Queue<Runnable> requestsToSendWhenCleared = new LinkedBlockingDeque<>();
private boolean cleared = false;
@Override
public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
// delayed sending - even if larger then the request timeout to simulated a potential late response from target node
TimeValue delay = delaySupplier.get();
if (delay.millis() <= 0) {
connection.sendRequest(requestId, action, request, options);
return;
}
// poor mans request cloning...
RequestHandlerRegistry reg = MockTransportService.this.getRequestHandler(action);
BytesStreamOutput bStream = new BytesStreamOutput();
request.writeTo(bStream);
final TransportRequest clonedRequest = reg.newRequest(bStream.bytes().streamInput());
Runnable runnable = new AbstractRunnable() {
AtomicBoolean requestSent = new AtomicBoolean();
@Override
public void onFailure(Exception e) {
LOGGER.debug("failed to send delayed request", e);
}
@Override
protected void doRun() throws IOException {
if (requestSent.compareAndSet(false, true)) {
connection.sendRequest(requestId, action, clonedRequest, options);
}
}
};
// store the request to send it once the rule is cleared.
synchronized (this) {
if (cleared) {
runnable.run();
} else {
requestsToSendWhenCleared.add(runnable);
threadPool.schedule(runnable, delay, ThreadPool.Names.GENERIC);
}
}
}
@Override
public void clearCallback() {
synchronized (this) {
assert cleared == false;
cleared = true;
requestsToSendWhenCleared.forEach(Runnable::run);
}
}
});
}
Aggregations