use of io.crate.common.unit.TimeValue in project crate by crate.
the class IndexRecoveryIT method testDisconnectsDuringRecovery.
/**
* Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
* the source is working on the recovery process.
*/
@Test
public void testDisconnectsDuringRecovery() throws Exception {
boolean primaryRelocation = randomBoolean();
final String indexName = IndexParts.toIndexName(sqlExecutor.getCurrentSchema(), "test", null);
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
// start a master node
String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
execute("CREATE TABLE test (id int) CLUSTERED INTO 1 SHARDS " + "WITH (" + " number_of_replicas=0," + " \"routing.allocation.include.color\" = 'blue'" + ")");
int numDocs = scaledRandomIntBetween(25, 250);
var args = new Object[numDocs][];
for (int i = 0; i < numDocs; i++) {
args[i] = new Object[] { i };
}
execute("INSERT INTO test (id) VALUES (?)", args);
ensureGreen();
refresh();
var searchResponse = execute("SELECT COUNT(*) FROM test");
assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
redMockTransportService.addSendBehavior(blueMockTransportService, new StubbableTransport.SendRequestBehavior() {
private final AtomicInteger count = new AtomicInteger();
@Override
public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
// ensures that it's considered as valid recovery attempt by source
try {
assertBusy(() -> assertThat("Expected there to be some initializing shards", client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index(indexName).shard(0).getAllInitializingShards(), not(empty())));
} catch (Exception e) {
throw new RuntimeException(e);
}
connection.sendRequest(requestId, action, request, options);
try {
Thread.sleep(disconnectAfterDelay.millis());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
} else {
connection.sendRequest(requestId, action, request, options);
}
}
});
final AtomicBoolean finalized = new AtomicBoolean();
blueMockTransportService.addSendBehavior(redMockTransportService, (connection, requestId, action, request, options) -> {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
finalized.set(true);
}
connection.sendRequest(requestId, action, request, options);
});
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
mockTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
logger.info("--> sending request {} on {}", action, connection.getNode());
if ((primaryRelocation && finalized.get()) == false) {
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
}
connection.sendRequest(requestId, action, request, options);
});
}
if (primaryRelocation) {
logger.info("--> starting primary relocation recovery from blue to red");
execute("ALTER TABLE test SET (" + " \"routing.allocation.include.color\" = 'red'" + ")");
// also waits for relocation / recovery to complete
ensureGreen();
// if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
// source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
// shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
// will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
// searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
refresh();
} else {
logger.info("--> starting replica recovery from blue to red");
execute("ALTER TABLE test SET (" + " number_of_replicas=1," + " \"routing.allocation.include.color\" = 'red,blue'" + ")");
ensureGreen();
}
for (int i = 0; i < 10; i++) {
searchResponse = execute("SELECT COUNT(*) FROM test");
assertThat((long) searchResponse.rows()[0][0], is((long) numDocs));
}
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class MockTransportService method addUnresponsiveRule.
/**
* Adds a rule that will cause ignores each send request, simulating an unresponsive node
* and failing to connect once the rule was added.
*
* @param duration the amount of time to delay sending and connecting.
*/
public void addUnresponsiveRule(TransportAddress transportAddress, final TimeValue duration) {
final long startTime = System.currentTimeMillis();
Supplier<TimeValue> delaySupplier = () -> new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
transport().addConnectBehavior(transportAddress, new StubbableTransport.OpenConnectionBehavior() {
private CountDownLatch stopLatch = new CountDownLatch(1);
@Override
public void openConnection(Transport transport, DiscoveryNode discoveryNode, ConnectionProfile profile, ActionListener<Transport.Connection> listener) {
TimeValue delay = delaySupplier.get();
if (delay.millis() <= 0) {
original.openConnection(discoveryNode, profile, listener);
return;
}
// TODO: Replace with proper setting
TimeValue connectingTimeout = TransportSettings.CONNECT_TIMEOUT.getDefault(Settings.EMPTY);
try {
if (delay.millis() < connectingTimeout.millis()) {
stopLatch.await(delay.millis(), TimeUnit.MILLISECONDS);
original.openConnection(discoveryNode, profile, listener);
} else {
stopLatch.await(connectingTimeout.millis(), TimeUnit.MILLISECONDS);
listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
}
} catch (InterruptedException e) {
listener.onFailure(new ConnectTransportException(discoveryNode, "UNRESPONSIVE: simulated"));
}
}
@Override
public void clearCallback() {
stopLatch.countDown();
}
});
transport().addSendBehavior(transportAddress, new StubbableTransport.SendRequestBehavior() {
private final Queue<Runnable> requestsToSendWhenCleared = new LinkedBlockingDeque<>();
private boolean cleared = false;
@Override
public void sendRequest(Transport.Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
// delayed sending - even if larger then the request timeout to simulated a potential late response from target node
TimeValue delay = delaySupplier.get();
if (delay.millis() <= 0) {
connection.sendRequest(requestId, action, request, options);
return;
}
// poor mans request cloning...
RequestHandlerRegistry reg = MockTransportService.this.getRequestHandler(action);
BytesStreamOutput bStream = new BytesStreamOutput();
request.writeTo(bStream);
final TransportRequest clonedRequest = reg.newRequest(bStream.bytes().streamInput());
Runnable runnable = new AbstractRunnable() {
AtomicBoolean requestSent = new AtomicBoolean();
@Override
public void onFailure(Exception e) {
LOGGER.debug("failed to send delayed request", e);
}
@Override
protected void doRun() throws IOException {
if (requestSent.compareAndSet(false, true)) {
connection.sendRequest(requestId, action, clonedRequest, options);
}
}
};
// store the request to send it once the rule is cleared.
synchronized (this) {
if (cleared) {
runnable.run();
} else {
requestsToSendWhenCleared.add(runnable);
threadPool.schedule(runnable, delay, ThreadPool.Names.GENERIC);
}
}
}
@Override
public void clearCallback() {
synchronized (this) {
assert cleared == false;
cleared = true;
requestsToSendWhenCleared.forEach(Runnable::run);
}
}
});
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class ElectionSchedulerFactoryTests method assertElectionSchedule.
private void assertElectionSchedule(final DeterministicTaskQueue deterministicTaskQueue, final ElectionSchedulerFactory electionSchedulerFactory, final long initialTimeout, final long backOffTime, final long maxTimeout, final long duration) {
final TimeValue initialGracePeriod = randomGracePeriod();
final AtomicBoolean electionStarted = new AtomicBoolean();
try (Releasable ignored = electionSchedulerFactory.startElectionScheduler(initialGracePeriod, () -> assertTrue(electionStarted.compareAndSet(false, true)))) {
long lastElectionFinishTime = deterministicTaskQueue.getCurrentTimeMillis();
int electionCount = 0;
while (true) {
electionCount++;
while (electionStarted.get() == false) {
if (deterministicTaskQueue.hasRunnableTasks() == false) {
deterministicTaskQueue.advanceTime();
}
deterministicTaskQueue.runAllRunnableTasks();
}
assertTrue(electionStarted.compareAndSet(true, false));
final long thisElectionStartTime = deterministicTaskQueue.getCurrentTimeMillis();
if (electionCount == 1) {
final long electionDelay = thisElectionStartTime - lastElectionFinishTime;
// Check grace period
assertThat(electionDelay, greaterThanOrEqualTo(initialGracePeriod.millis()));
// Check upper bound
assertThat(electionDelay, lessThanOrEqualTo(initialTimeout + initialGracePeriod.millis()));
assertThat(electionDelay, lessThanOrEqualTo(maxTimeout + initialGracePeriod.millis()));
} else {
final long electionDelay = thisElectionStartTime - lastElectionFinishTime;
// Check upper bound
assertThat(electionDelay, lessThanOrEqualTo(initialTimeout + backOffTime * (electionCount - 1)));
assertThat(electionDelay, lessThanOrEqualTo(maxTimeout));
// Run until we get a delay close to the maximum to show that backing off does work
if (electionCount >= 1000) {
if (electionDelay >= maxTimeout * 0.99) {
break;
}
}
}
lastElectionFinishTime = thisElectionStartTime + duration;
}
}
deterministicTaskQueue.runAllTasks();
assertFalse(electionStarted.get());
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class IndexSettingsTests method testUpdateTranslogRetentionSettingsWithSoftDeletesDisabled.
@Test
public void testUpdateTranslogRetentionSettingsWithSoftDeletesDisabled() {
Settings.Builder settings = Settings.builder().put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false).put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT);
TimeValue ageSetting = TimeValue.timeValueHours(12);
if (randomBoolean()) {
ageSetting = randomBoolean() ? TimeValue.MINUS_ONE : TimeValue.timeValueSeconds(randomIntBetween(0, 60));
settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey(), ageSetting);
}
ByteSizeValue sizeSetting = new ByteSizeValue(512, ByteSizeUnit.MB);
if (randomBoolean()) {
sizeSetting = randomBoolean() ? new ByteSizeValue(-1) : new ByteSizeValue(randomIntBetween(0, 1024));
settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), sizeSetting);
}
IndexMetadata metaData = newIndexMeta("index", settings.build());
IndexSettings indexSettings = new IndexSettings(metaData, Settings.EMPTY);
assertThat(indexSettings.getTranslogRetentionAge(), equalTo(ageSetting));
assertThat(indexSettings.getTranslogRetentionSize(), equalTo(sizeSetting));
Settings.Builder newSettings = Settings.builder().put(settings.build());
if (randomBoolean()) {
ageSetting = randomBoolean() ? TimeValue.MINUS_ONE : TimeValue.timeValueSeconds(randomIntBetween(0, 60));
newSettings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey(), ageSetting);
}
if (randomBoolean()) {
sizeSetting = randomBoolean() ? new ByteSizeValue(-1) : new ByteSizeValue(randomIntBetween(0, 1024));
newSettings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), sizeSetting);
}
indexSettings.updateIndexMetadata(newIndexMeta("index", newSettings.build()));
assertThat(indexSettings.getTranslogRetentionAge(), equalTo(ageSetting));
assertThat(indexSettings.getTranslogRetentionSize(), equalTo(sizeSetting));
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class TransportHandshakerTests method testHandshakeTimeout.
@Test
public void testHandshakeTimeout() throws IOException {
PlainActionFuture<Version> versionFuture = PlainActionFuture.newFuture();
long reqId = randomLongBetween(1, 10);
handshaker.sendHandshake(reqId, node, channel, new TimeValue(100, TimeUnit.MILLISECONDS), versionFuture);
verify(requestSender).sendRequest(node, channel, reqId, Version.CURRENT.minimumCompatibilityVersion());
ConnectTransportException cte = expectThrows(ConnectTransportException.class, versionFuture::actionGet);
assertThat(cte.getMessage(), containsString("handshake_timeout"));
assertNull(handshaker.removeHandlerForHandshake(reqId));
}
Aggregations