use of org.elasticsearch.transport.TransportRequest in project elasticsearch by elastic.
the class IndexWithShadowReplicasIT method testPrimaryRelocationWhereRecoveryFails.
public void testPrimaryRelocationWhereRecoveryFails() throws Exception {
Path dataPath = createTempDir();
Settings nodeSettings = Settings.builder().put("node.add_lock_id_to_custom_path", false).put(Environment.PATH_SHARED_DATA_SETTING.getKey(), dataPath).build();
String node1 = internalCluster().startNode(nodeSettings);
final String IDX = "test";
Settings idxSettings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString()).put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true).build();
prepareCreate(IDX).setSettings(idxSettings).addMapping("doc", "foo", "type=text").get();
// Node1 has the primary, now node2 has the replica
String node2 = internalCluster().startNode(nodeSettings);
ensureGreen(IDX);
flushAndRefresh(IDX);
String node3 = internalCluster().startNode(nodeSettings);
final AtomicInteger counter = new AtomicInteger(0);
final CountDownLatch started = new CountDownLatch(1);
final int numPhase1Docs = scaledRandomIntBetween(25, 200);
final int numPhase2Docs = scaledRandomIntBetween(25, 200);
final int numPhase3Docs = scaledRandomIntBetween(25, 200);
final CountDownLatch phase1finished = new CountDownLatch(1);
final CountDownLatch phase2finished = new CountDownLatch(1);
final CountDownLatch phase3finished = new CountDownLatch(1);
final AtomicBoolean keepFailing = new AtomicBoolean(true);
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, node1));
mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, node3), new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
if (keepFailing.get() && action.equals(PeerRecoveryTargetService.Actions.TRANSLOG_OPS)) {
logger.info("--> failing translog ops");
throw new ElasticsearchException("failing on purpose");
}
super.sendRequest(connection, requestId, action, request, options);
}
});
Thread thread = new Thread() {
@Override
public void run() {
started.countDown();
while (counter.get() < (numPhase1Docs + numPhase2Docs + numPhase3Docs)) {
final IndexResponse indexResponse = client().prepareIndex(IDX, "doc", Integer.toString(counter.incrementAndGet())).setSource("foo", "bar").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
final int docCount = counter.get();
if (docCount == numPhase1Docs) {
phase1finished.countDown();
} else if (docCount == (numPhase1Docs + numPhase2Docs)) {
phase2finished.countDown();
}
}
logger.info("--> stopping indexing thread");
phase3finished.countDown();
}
};
thread.start();
started.await();
// wait for a certain number of documents to be indexed
phase1finished.await();
logger.info("--> excluding {} from allocation", node1);
// now prevent primary from being allocated on node 1 move to node_3
Settings build = Settings.builder().put("index.routing.allocation.exclude._name", node1).build();
client().admin().indices().prepareUpdateSettings(IDX).setSettings(build).execute().actionGet();
// wait for more documents to be indexed post-recovery, also waits for
// indexing thread to stop
phase2finished.await();
// stop failing
keepFailing.set(false);
// wait for more docs to be indexed
phase3finished.await();
ensureGreen(IDX);
thread.join();
logger.info("--> performing query");
flushAndRefresh();
SearchResponse resp = client().prepareSearch(IDX).setQuery(matchAllQuery()).get();
assertHitCount(resp, counter.get());
}
use of org.elasticsearch.transport.TransportRequest in project elasticsearch by elastic.
the class ExceptionRetryIT method testRetryDueToExceptionOnNetworkLayer.
/**
* Tests retry mechanism when indexing. If an exception occurs when indexing then the indexing request is tried again before finally
* failing. If auto generated ids are used this must not lead to duplicate ids
* see https://github.com/elastic/elasticsearch/issues/8788
*/
public void testRetryDueToExceptionOnNetworkLayer() throws ExecutionException, InterruptedException, IOException {
final AtomicBoolean exceptionThrown = new AtomicBoolean(false);
int numDocs = scaledRandomIntBetween(100, 1000);
Client client = internalCluster().coordOnlyNodeClient();
NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
NodeStats unluckyNode = randomFrom(nodeStats.getNodes().stream().filter((s) -> s.getNode().isDataNode()).collect(Collectors.toList()));
assertAcked(client().admin().indices().prepareCreate("index").setSettings(Settings.builder().put("index.number_of_replicas", 1).put("index.number_of_shards", 5)));
ensureGreen("index");
logger.info("unlucky node: {}", unluckyNode.getNode());
//create a transport service that throws a ConnectTransportException for one bulk request and therefore triggers a retry.
for (NodeStats dataNode : nodeStats.getNodes()) {
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()), new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
super.sendRequest(connection, requestId, action, request, options);
if (action.equals(TransportShardBulkAction.ACTION_NAME) && exceptionThrown.compareAndSet(false, true)) {
logger.debug("Throw ConnectTransportException");
throw new ConnectTransportException(connection.getNode(), action);
}
}
});
}
BulkRequestBuilder bulkBuilder = client.prepareBulk();
for (int i = 0; i < numDocs; i++) {
XContentBuilder doc = null;
doc = jsonBuilder().startObject().field("foo", "bar").endObject();
bulkBuilder.add(client.prepareIndex("index", "type").setSource(doc));
}
BulkResponse response = bulkBuilder.get();
if (response.hasFailures()) {
for (BulkItemResponse singleIndexRespons : response.getItems()) {
if (singleIndexRespons.isFailed()) {
fail("None of the bulk items should fail but got " + singleIndexRespons.getFailureMessage());
}
}
}
refresh();
SearchResponse searchResponse = client().prepareSearch("index").setSize(numDocs * 2).addStoredField("_id").get();
Set<String> uniqueIds = new HashSet();
long dupCounter = 0;
boolean found_duplicate_already = false;
for (int i = 0; i < searchResponse.getHits().getHits().length; i++) {
if (!uniqueIds.add(searchResponse.getHits().getHits()[i].getId())) {
if (!found_duplicate_already) {
SearchResponse dupIdResponse = client().prepareSearch("index").setQuery(termQuery("_id", searchResponse.getHits().getHits()[i].getId())).setExplain(true).get();
assertThat(dupIdResponse.getHits().getTotalHits(), greaterThan(1L));
logger.info("found a duplicate id:");
for (SearchHit hit : dupIdResponse.getHits()) {
logger.info("Doc {} was found on shard {}", hit.getId(), hit.getShard().getShardId());
}
logger.info("will not print anymore in case more duplicates are found.");
found_duplicate_already = true;
}
dupCounter++;
}
}
assertSearchResponse(searchResponse);
assertThat(dupCounter, equalTo(0L));
assertHitCount(searchResponse, numDocs);
IndicesStatsResponse index = client().admin().indices().prepareStats("index").clear().setSegments(true).get();
IndexStats indexStats = index.getIndex("index");
long maxUnsafeAutoIdTimestamp = Long.MIN_VALUE;
for (IndexShardStats indexShardStats : indexStats) {
for (ShardStats shardStats : indexShardStats) {
SegmentsStats segments = shardStats.getStats().getSegments();
maxUnsafeAutoIdTimestamp = Math.max(maxUnsafeAutoIdTimestamp, segments.getMaxUnsafeAutoIdTimestamp());
}
}
assertTrue("exception must have been thrown otherwise setup is broken", exceptionThrown.get());
assertTrue("maxUnsafeAutoIdTimestamp must be > than 0 we have at least one retry", maxUnsafeAutoIdTimestamp > -1);
}
use of org.elasticsearch.transport.TransportRequest in project elasticsearch by elastic.
the class RemoteClusterConnection method getConnection.
/**
* Returns a connection to the remote cluster. This connection might be a proxy connection that redirects internally to the
* given node.
*/
Transport.Connection getConnection(DiscoveryNode remoteClusterNode) {
DiscoveryNode discoveryNode = nodeSupplier.get();
Transport.Connection connection = transportService.getConnection(discoveryNode);
return new Transport.Connection() {
@Override
public DiscoveryNode getNode() {
return remoteClusterNode;
}
@Override
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException, TransportException {
connection.sendRequest(requestId, TransportActionProxy.getProxyAction(action), TransportActionProxy.wrapRequest(remoteClusterNode, request), options);
}
@Override
public void close() throws IOException {
assert false : "proxy connections must not be closed";
}
};
}
use of org.elasticsearch.transport.TransportRequest in project elasticsearch by elastic.
the class IndexRecoveryIT method testDisconnectsDuringRecovery.
/**
* Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
* the source is working on the recovery process.
*/
@TestLogging("_root:DEBUG,org.elasticsearch.indices.recovery:TRACE")
public void testDisconnectsDuringRecovery() throws Exception {
boolean primaryRelocation = randomBoolean();
final String indexName = "test";
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
// start a master node
String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue").put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)).get();
List<IndexRequestBuilder> requests = new ArrayList<>();
int numDocs = scaledRandomIntBetween(25, 250);
for (int i = 0; i < numDocs; i++) {
requests.add(client().prepareIndex(indexName, "type").setSource("{}", XContentType.JSON));
}
indexRandom(true, requests);
ensureSearchable(indexName);
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
redMockTransportService.addDelegate(blueMockTransportService, new MockTransportService.DelegateTransport(redMockTransportService.original()) {
private final AtomicInteger count = new AtomicInteger();
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
// ensures that it's considered as valid recovery attempt by source
try {
awaitBusy(() -> client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index("test").shard(0).getAllInitializingShards().isEmpty() == false);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
super.sendRequest(connection, requestId, action, request, options);
try {
Thread.sleep(disconnectAfterDelay.millis());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
} else {
super.sendRequest(connection, requestId, action, request, options);
}
}
});
final AtomicBoolean finalized = new AtomicBoolean();
blueMockTransportService.addDelegate(redMockTransportService, new MockTransportService.DelegateTransport(blueMockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
finalized.set(true);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
mockTransportService.addDelegate(masterTransportService, new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if ((primaryRelocation && finalized.get()) == false) {
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
}
if (primaryRelocation) {
logger.info("--> starting primary relocation recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red")).get();
// also waits for relocation / recovery to complete
ensureGreen();
// if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
// source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
// shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
// will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
// searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
client().admin().indices().prepareRefresh(indexName).get();
} else {
logger.info("--> starting replica recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red,blue").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)).get();
ensureGreen();
}
for (int i = 0; i < 10; i++) {
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
}
}
use of org.elasticsearch.transport.TransportRequest in project elasticsearch by elastic.
the class TruncatedRecoveryIT method testCancelRecoveryAndResume.
/**
* This test tries to truncate some of larger files in the index to trigger leftovers on the recovery
* target. This happens during recovery when the last chunk of the file is transferred to the replica
* we just throw an exception to make sure the recovery fails and we leave some half baked files on the target.
* Later we allow full recovery to ensure we can still recover and don't run into corruptions.
*/
public void testCancelRecoveryAndResume() throws Exception {
assertTrue(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(CHUNK_SIZE_SETTING.getKey(), new ByteSizeValue(randomIntBetween(50, 300), ByteSizeUnit.BYTES))).get().isAcknowledged());
NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
List<NodeStats> dataNodeStats = new ArrayList<>();
for (NodeStats stat : nodeStats.getNodes()) {
if (stat.getNode().isDataNode()) {
dataNodeStats.add(stat);
}
}
assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));
Collections.shuffle(dataNodeStats, random());
// we use 2 nodes a lucky and unlucky one
// the lucky one holds the primary
// the unlucky one gets the replica and the truncated leftovers
NodeStats primariesNode = dataNodeStats.get(0);
NodeStats unluckyNode = dataNodeStats.get(1);
// create the index and prevent allocation on any other nodes than the lucky one
// we have no replicas so far and make sure that we allocate the primary on the lucky node
assertAcked(prepareCreate("test").addMapping("type1", "field1", "type=text", "the_id", "type=text").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numberOfShards()).put("index.routing.allocation.include._name", // only allocate on the lucky node
primariesNode.getNode().getName())));
// index some docs and check if they are coming back
int numDocs = randomIntBetween(100, 200);
List<IndexRequestBuilder> builder = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
builder.add(client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(i), "the_id", id));
}
indexRandom(true, builder);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
assertHitCount(client().prepareSearch().setQuery(QueryBuilders.termQuery("the_id", id)).get(), 1);
}
ensureGreen();
// ensure we have flushed segments and make them a big one via optimize
client().admin().indices().prepareFlush().setForce(true).get();
client().admin().indices().prepareForceMerge().setMaxNumSegments(1).setFlush(true).get();
final CountDownLatch latch = new CountDownLatch(1);
final AtomicBoolean truncate = new AtomicBoolean(true);
for (NodeStats dataNode : dataNodeStats) {
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()), new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
if (action.equals(PeerRecoveryTargetService.Actions.FILE_CHUNK)) {
RecoveryFileChunkRequest req = (RecoveryFileChunkRequest) request;
logger.debug("file chunk [{}] lastChunk: {}", req, req.lastChunk());
if ((req.name().endsWith("cfs") || req.name().endsWith("fdt")) && req.lastChunk() && truncate.get()) {
latch.countDown();
throw new RuntimeException("Caused some truncated files for fun and profit");
}
}
super.sendRequest(connection, requestId, action, request, options);
}
});
}
//
logger.info("--> bumping replicas to 1");
client().admin().indices().prepareUpdateSettings("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put(// now allow allocation on all nodes
"index.routing.allocation.include._name", primariesNode.getNode().getName() + "," + unluckyNode.getNode().getName())).get();
latch.await();
// at this point we got some truncated left overs on the replica on the unlucky node
// now we are allowing the recovery to allocate again and finish to see if we wipe the truncated files
truncate.compareAndSet(true, false);
ensureGreen("test");
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
assertHitCount(client().prepareSearch().setQuery(QueryBuilders.termQuery("the_id", id)).get(), 1);
}
}
Aggregations