use of org.elasticsearch.test.transport.MockTransportService in project elasticsearch by elastic.
the class IndexWithShadowReplicasIT method testPrimaryRelocationWhereRecoveryFails.
public void testPrimaryRelocationWhereRecoveryFails() throws Exception {
Path dataPath = createTempDir();
Settings nodeSettings = Settings.builder().put("node.add_lock_id_to_custom_path", false).put(Environment.PATH_SHARED_DATA_SETTING.getKey(), dataPath).build();
String node1 = internalCluster().startNode(nodeSettings);
final String IDX = "test";
Settings idxSettings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString()).put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true).build();
prepareCreate(IDX).setSettings(idxSettings).addMapping("doc", "foo", "type=text").get();
// Node1 has the primary, now node2 has the replica
String node2 = internalCluster().startNode(nodeSettings);
ensureGreen(IDX);
flushAndRefresh(IDX);
String node3 = internalCluster().startNode(nodeSettings);
final AtomicInteger counter = new AtomicInteger(0);
final CountDownLatch started = new CountDownLatch(1);
final int numPhase1Docs = scaledRandomIntBetween(25, 200);
final int numPhase2Docs = scaledRandomIntBetween(25, 200);
final int numPhase3Docs = scaledRandomIntBetween(25, 200);
final CountDownLatch phase1finished = new CountDownLatch(1);
final CountDownLatch phase2finished = new CountDownLatch(1);
final CountDownLatch phase3finished = new CountDownLatch(1);
final AtomicBoolean keepFailing = new AtomicBoolean(true);
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, node1));
mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, node3), new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
if (keepFailing.get() && action.equals(PeerRecoveryTargetService.Actions.TRANSLOG_OPS)) {
logger.info("--> failing translog ops");
throw new ElasticsearchException("failing on purpose");
}
super.sendRequest(connection, requestId, action, request, options);
}
});
Thread thread = new Thread() {
@Override
public void run() {
started.countDown();
while (counter.get() < (numPhase1Docs + numPhase2Docs + numPhase3Docs)) {
final IndexResponse indexResponse = client().prepareIndex(IDX, "doc", Integer.toString(counter.incrementAndGet())).setSource("foo", "bar").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
final int docCount = counter.get();
if (docCount == numPhase1Docs) {
phase1finished.countDown();
} else if (docCount == (numPhase1Docs + numPhase2Docs)) {
phase2finished.countDown();
}
}
logger.info("--> stopping indexing thread");
phase3finished.countDown();
}
};
thread.start();
started.await();
// wait for a certain number of documents to be indexed
phase1finished.await();
logger.info("--> excluding {} from allocation", node1);
// now prevent primary from being allocated on node 1 move to node_3
Settings build = Settings.builder().put("index.routing.allocation.exclude._name", node1).build();
client().admin().indices().prepareUpdateSettings(IDX).setSettings(build).execute().actionGet();
// wait for more documents to be indexed post-recovery, also waits for
// indexing thread to stop
phase2finished.await();
// stop failing
keepFailing.set(false);
// wait for more docs to be indexed
phase3finished.await();
ensureGreen(IDX);
thread.join();
logger.info("--> performing query");
flushAndRefresh();
SearchResponse resp = client().prepareSearch(IDX).setQuery(matchAllQuery()).get();
assertHitCount(resp, counter.get());
}
use of org.elasticsearch.test.transport.MockTransportService in project elasticsearch by elastic.
the class ExceptionRetryIT method testRetryDueToExceptionOnNetworkLayer.
/**
* Tests retry mechanism when indexing. If an exception occurs when indexing then the indexing request is tried again before finally
* failing. If auto generated ids are used this must not lead to duplicate ids
* see https://github.com/elastic/elasticsearch/issues/8788
*/
public void testRetryDueToExceptionOnNetworkLayer() throws ExecutionException, InterruptedException, IOException {
final AtomicBoolean exceptionThrown = new AtomicBoolean(false);
int numDocs = scaledRandomIntBetween(100, 1000);
Client client = internalCluster().coordOnlyNodeClient();
NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
NodeStats unluckyNode = randomFrom(nodeStats.getNodes().stream().filter((s) -> s.getNode().isDataNode()).collect(Collectors.toList()));
assertAcked(client().admin().indices().prepareCreate("index").setSettings(Settings.builder().put("index.number_of_replicas", 1).put("index.number_of_shards", 5)));
ensureGreen("index");
logger.info("unlucky node: {}", unluckyNode.getNode());
//create a transport service that throws a ConnectTransportException for one bulk request and therefore triggers a retry.
for (NodeStats dataNode : nodeStats.getNodes()) {
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()), new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
super.sendRequest(connection, requestId, action, request, options);
if (action.equals(TransportShardBulkAction.ACTION_NAME) && exceptionThrown.compareAndSet(false, true)) {
logger.debug("Throw ConnectTransportException");
throw new ConnectTransportException(connection.getNode(), action);
}
}
});
}
BulkRequestBuilder bulkBuilder = client.prepareBulk();
for (int i = 0; i < numDocs; i++) {
XContentBuilder doc = null;
doc = jsonBuilder().startObject().field("foo", "bar").endObject();
bulkBuilder.add(client.prepareIndex("index", "type").setSource(doc));
}
BulkResponse response = bulkBuilder.get();
if (response.hasFailures()) {
for (BulkItemResponse singleIndexRespons : response.getItems()) {
if (singleIndexRespons.isFailed()) {
fail("None of the bulk items should fail but got " + singleIndexRespons.getFailureMessage());
}
}
}
refresh();
SearchResponse searchResponse = client().prepareSearch("index").setSize(numDocs * 2).addStoredField("_id").get();
Set<String> uniqueIds = new HashSet();
long dupCounter = 0;
boolean found_duplicate_already = false;
for (int i = 0; i < searchResponse.getHits().getHits().length; i++) {
if (!uniqueIds.add(searchResponse.getHits().getHits()[i].getId())) {
if (!found_duplicate_already) {
SearchResponse dupIdResponse = client().prepareSearch("index").setQuery(termQuery("_id", searchResponse.getHits().getHits()[i].getId())).setExplain(true).get();
assertThat(dupIdResponse.getHits().getTotalHits(), greaterThan(1L));
logger.info("found a duplicate id:");
for (SearchHit hit : dupIdResponse.getHits()) {
logger.info("Doc {} was found on shard {}", hit.getId(), hit.getShard().getShardId());
}
logger.info("will not print anymore in case more duplicates are found.");
found_duplicate_already = true;
}
dupCounter++;
}
}
assertSearchResponse(searchResponse);
assertThat(dupCounter, equalTo(0L));
assertHitCount(searchResponse, numDocs);
IndicesStatsResponse index = client().admin().indices().prepareStats("index").clear().setSegments(true).get();
IndexStats indexStats = index.getIndex("index");
long maxUnsafeAutoIdTimestamp = Long.MIN_VALUE;
for (IndexShardStats indexShardStats : indexStats) {
for (ShardStats shardStats : indexShardStats) {
SegmentsStats segments = shardStats.getStats().getSegments();
maxUnsafeAutoIdTimestamp = Math.max(maxUnsafeAutoIdTimestamp, segments.getMaxUnsafeAutoIdTimestamp());
}
}
assertTrue("exception must have been thrown otherwise setup is broken", exceptionThrown.get());
assertTrue("maxUnsafeAutoIdTimestamp must be > than 0 we have at least one retry", maxUnsafeAutoIdTimestamp > -1);
}
use of org.elasticsearch.test.transport.MockTransportService in project elasticsearch by elastic.
the class IndicesStoreIntegrationIT method relocateAndBlockCompletion.
/**
* relocate a shard and block cluster state processing on the relocation target node to activate the shard
*/
public static BlockClusterStateProcessing relocateAndBlockCompletion(Logger logger, String index, int shard, String nodeFrom, String nodeTo) throws InterruptedException {
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodeTo, random());
internalCluster().setDisruptionScheme(disruption);
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeTo);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nodeTo);
CountDownLatch beginRelocationLatch = new CountDownLatch(1);
CountDownLatch receivedShardExistsRequestLatch = new CountDownLatch(1);
// use a tracer on the target node to track relocation start and end
transportService.addTracer(new MockTransportService.Tracer() {
@Override
public void receivedRequest(long requestId, String action) {
if (action.equals(PeerRecoveryTargetService.Actions.FILES_INFO)) {
logger.info("received: {}, relocation starts", action);
beginRelocationLatch.countDown();
} else if (action.equals(IndicesStore.ACTION_SHARD_EXISTS)) {
// Whenever a node deletes a shard because it was relocated somewhere else, it first
// checks if enough other copies are started somewhere else. The node sends a ShardActiveRequest
// to the other nodes that should have a copy according to cluster state.
receivedShardExistsRequestLatch.countDown();
logger.info("received: {}, relocation done", action);
} else if (action.equals(PeerRecoveryTargetService.Actions.WAIT_CLUSTERSTATE)) {
logger.info("received: {}, waiting on cluster state", action);
// a race with the BlockClusterStateProcessing block that is added below.
try {
assertBusy(() -> assertTrue(clusterService.state().routingTable().index(index).shard(shard).primaryShard().relocating()));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
});
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(index, shard, nodeFrom, nodeTo)).get();
logger.info("--> waiting for relocation to start");
beginRelocationLatch.await();
logger.info("--> starting disruption");
disruption.startDisrupting();
logger.info("--> waiting for relocation to finish");
receivedShardExistsRequestLatch.await();
logger.info("--> relocation completed (but cluster state processing block still in place)");
return disruption;
}
use of org.elasticsearch.test.transport.MockTransportService in project elasticsearch by elastic.
the class IndexRecoveryIT method testDisconnectsDuringRecovery.
/**
* Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
* the source is working on the recovery process.
*/
@TestLogging("_root:DEBUG,org.elasticsearch.indices.recovery:TRACE")
public void testDisconnectsDuringRecovery() throws Exception {
boolean primaryRelocation = randomBoolean();
final String indexName = "test";
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
// start a master node
String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue").put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)).get();
List<IndexRequestBuilder> requests = new ArrayList<>();
int numDocs = scaledRandomIntBetween(25, 250);
for (int i = 0; i < numDocs; i++) {
requests.add(client().prepareIndex(indexName, "type").setSource("{}", XContentType.JSON));
}
indexRandom(true, requests);
ensureSearchable(indexName);
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
redMockTransportService.addDelegate(blueMockTransportService, new MockTransportService.DelegateTransport(redMockTransportService.original()) {
private final AtomicInteger count = new AtomicInteger();
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
// ensures that it's considered as valid recovery attempt by source
try {
awaitBusy(() -> client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index("test").shard(0).getAllInitializingShards().isEmpty() == false);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
super.sendRequest(connection, requestId, action, request, options);
try {
Thread.sleep(disconnectAfterDelay.millis());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
} else {
super.sendRequest(connection, requestId, action, request, options);
}
}
});
final AtomicBoolean finalized = new AtomicBoolean();
blueMockTransportService.addDelegate(redMockTransportService, new MockTransportService.DelegateTransport(blueMockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
finalized.set(true);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
mockTransportService.addDelegate(masterTransportService, new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if ((primaryRelocation && finalized.get()) == false) {
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
}
if (primaryRelocation) {
logger.info("--> starting primary relocation recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red")).get();
// also waits for relocation / recovery to complete
ensureGreen();
// if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
// source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
// shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
// will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
// searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
client().admin().indices().prepareRefresh(indexName).get();
} else {
logger.info("--> starting replica recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red,blue").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)).get();
ensureGreen();
}
for (int i = 0; i < 10; i++) {
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
}
}
use of org.elasticsearch.test.transport.MockTransportService in project elasticsearch by elastic.
the class IndexRecoveryIT method testDisconnectsWhileRecovering.
public void testDisconnectsWhileRecovering() throws Exception {
final String indexName = "test";
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms").put(RecoverySettings.INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING.getKey(), "1s").put(MockFSDirectoryService.RANDOM_PREVENT_DOUBLE_WRITE_SETTING.getKey(), // restarted recoveries will delete temp files and write them again
false).build();
// start a master node
internalCluster().startNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get();
assertThat(response.isTimedOut(), is(false));
client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue").put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)).get();
List<IndexRequestBuilder> requests = new ArrayList<>();
int numDocs = scaledRandomIntBetween(25, 250);
for (int i = 0; i < numDocs; i++) {
requests.add(client().prepareIndex(indexName, "type").setSource("{}", XContentType.JSON));
}
indexRandom(true, requests);
ensureSearchable(indexName);
ClusterStateResponse stateResponse = client().admin().cluster().prepareState().get();
final String blueNodeId = internalCluster().getInstance(ClusterService.class, blueNodeName).localNode().getId();
assertFalse(stateResponse.getState().getRoutingNodes().node(blueNodeId).isEmpty());
SearchResponse searchResponse = client().prepareSearch(indexName).get();
assertHitCount(searchResponse, numDocs);
String[] recoveryActions = new String[] { PeerRecoverySourceService.Actions.START_RECOVERY, PeerRecoveryTargetService.Actions.FILES_INFO, PeerRecoveryTargetService.Actions.FILE_CHUNK, PeerRecoveryTargetService.Actions.CLEAN_FILES, //RecoveryTarget.Actions.TRANSLOG_OPS, <-- may not be sent if already flushed
PeerRecoveryTargetService.Actions.PREPARE_TRANSLOG, PeerRecoveryTargetService.Actions.FINALIZE };
final String recoveryActionToBlock = randomFrom(recoveryActions);
final boolean dropRequests = randomBoolean();
logger.info("--> will {} between blue & red on [{}]", dropRequests ? "drop requests" : "break connection", recoveryActionToBlock);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
TransportService redTransportService = internalCluster().getInstance(TransportService.class, redNodeName);
TransportService blueTransportService = internalCluster().getInstance(TransportService.class, blueNodeName);
final CountDownLatch requestBlocked = new CountDownLatch(1);
blueMockTransportService.addDelegate(redTransportService, new RecoveryActionBlocker(dropRequests, recoveryActionToBlock, blueMockTransportService.original(), requestBlocked));
redMockTransportService.addDelegate(blueTransportService, new RecoveryActionBlocker(dropRequests, recoveryActionToBlock, redMockTransportService.original(), requestBlocked));
logger.info("--> starting recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red,blue").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)).get();
requestBlocked.await();
logger.info("--> stopping to block recovery");
blueMockTransportService.clearAllRules();
redMockTransportService.clearAllRules();
ensureGreen();
searchResponse = client(redNodeName).prepareSearch(indexName).setPreference("_local").get();
assertHitCount(searchResponse, numDocs);
}
Aggregations