use of org.elasticsearch.test.junit.annotations.TestLogging in project elasticsearch by elastic.
the class RelocationIT method testIndexAndRelocateConcurrently.
@TestLogging("org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.search:TRACE")
public void testIndexAndRelocateConcurrently() throws ExecutionException, InterruptedException {
int halfNodes = randomIntBetween(1, 3);
Settings[] nodeSettings = Stream.concat(Stream.generate(() -> Settings.builder().put("node.attr.color", "blue").build()).limit(halfNodes), Stream.generate(() -> Settings.builder().put("node.attr.color", "red").build()).limit(halfNodes)).toArray(Settings[]::new);
List<String> nodes = internalCluster().startNodes(nodeSettings);
String[] blueNodes = nodes.subList(0, halfNodes).stream().toArray(String[]::new);
String[] redNodes = nodes.subList(halfNodes, nodes.size()).stream().toArray(String[]::new);
logger.info("blue nodes: {}", (Object) blueNodes);
logger.info("red nodes: {}", (Object) redNodes);
ensureStableCluster(halfNodes * 2);
assertAcked(prepareCreate("test", Settings.builder().put("index.routing.allocation.exclude.color", "blue").put(indexSettings()).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(halfNodes - 1))));
assertAllShardsOnNodes("test", redNodes);
int numDocs = randomIntBetween(100, 150);
ArrayList<String> ids = new ArrayList<>();
logger.info(" --> indexing [{}] docs", numDocs);
IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs];
for (int i = 0; i < numDocs; i++) {
String id = randomRealisticUnicodeOfLength(10) + String.valueOf(i);
ids.add(id);
docs[i] = client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(i));
}
indexRandom(true, docs);
SearchResponse countResponse = client().prepareSearch("test").get();
assertHitCount(countResponse, numDocs);
logger.info(" --> moving index to new nodes");
Settings build = Settings.builder().put("index.routing.allocation.exclude.color", "red").put("index.routing.allocation.include.color", "blue").build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).execute().actionGet();
// index while relocating
logger.info(" --> indexing [{}] more docs", numDocs);
for (int i = 0; i < numDocs; i++) {
String id = randomRealisticUnicodeOfLength(10) + String.valueOf(numDocs + i);
ids.add(id);
docs[i] = client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(numDocs + i));
}
indexRandom(true, docs);
numDocs *= 2;
logger.info(" --> waiting for relocation to complete");
// move all shards to the new nodes (it waits on relocation)
ensureGreen("test");
final int numIters = randomIntBetween(10, 20);
for (int i = 0; i < numIters; i++) {
logger.info(" --> checking iteration {}", i);
SearchResponse afterRelocation = client().prepareSearch().setSize(ids.size()).get();
assertNoFailures(afterRelocation);
assertSearchHits(afterRelocation, ids.toArray(new String[ids.size()]));
}
}
use of org.elasticsearch.test.junit.annotations.TestLogging in project elasticsearch by elastic.
the class RelocationIT method testRelocationWhileIndexingRandom.
@TestLogging("org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.search:TRACE")
public void testRelocationWhileIndexingRandom() throws Exception {
int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
int numberOfReplicas = randomBoolean() ? 0 : 1;
int numberOfNodes = numberOfReplicas == 0 ? 2 : 3;
logger.info("testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})", numberOfRelocations, numberOfReplicas, numberOfNodes);
String[] nodes = new String[numberOfNodes];
logger.info("--> starting [node1] ...");
nodes[0] = internalCluster().startNode();
logger.info("--> creating test index ...");
prepareCreate("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numberOfReplicas)).get();
for (int i = 2; i <= numberOfNodes; i++) {
logger.info("--> starting [node{}] ...", i);
nodes[i - 1] = internalCluster().startNode();
if (i != numberOfNodes) {
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes(Integer.toString(i)).setWaitForGreenStatus().execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
}
}
int numDocs = scaledRandomIntBetween(200, 2500);
try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type1", client(), numDocs)) {
logger.info("--> waiting for {} docs to be indexed ...", numDocs);
waitForDocs(numDocs, indexer);
logger.info("--> {} docs indexed", numDocs);
logger.info("--> starting relocations...");
// if we have replicas shift those
int nodeShiftBased = numberOfReplicas;
for (int i = 0; i < numberOfRelocations; i++) {
int fromNode = (i % 2);
int toNode = fromNode == 0 ? 1 : 0;
fromNode += nodeShiftBased;
toNode += nodeShiftBased;
numDocs = scaledRandomIntBetween(200, 1000);
logger.debug("--> Allow indexer to index [{}] documents", numDocs);
indexer.continueIndexing(numDocs);
logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]);
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, nodes[fromNode], nodes[toNode])).get();
if (rarely()) {
logger.debug("--> flushing");
client().admin().indices().prepareFlush().get();
}
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNoRelocatingShards(true).setTimeout(ACCEPTABLE_RELOCATION_TIME).execute().actionGet();
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
indexer.pauseIndexing();
logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode);
}
logger.info("--> done relocations");
logger.info("--> waiting for indexing threads to stop ...");
indexer.stop();
logger.info("--> indexing threads stopped");
logger.info("--> refreshing the index");
client().admin().indices().prepareRefresh("test").execute().actionGet();
logger.info("--> searching the index");
boolean ranOnce = false;
for (int i = 0; i < 10; i++) {
logger.info("--> START search test round {}", i + 1);
SearchHits hits = client().prepareSearch("test").setQuery(matchAllQuery()).setSize((int) indexer.totalIndexedDocs()).storedFields().execute().actionGet().getHits();
ranOnce = true;
if (hits.getTotalHits() != indexer.totalIndexedDocs()) {
int[] hitIds = new int[(int) indexer.totalIndexedDocs()];
for (int hit = 0; hit < indexer.totalIndexedDocs(); hit++) {
hitIds[hit] = hit + 1;
}
IntHashSet set = IntHashSet.from(hitIds);
for (SearchHit hit : hits.getHits()) {
int id = Integer.parseInt(hit.getId());
if (!set.remove(id)) {
logger.error("Extra id [{}]", id);
}
}
set.forEach((IntProcedure) value -> {
logger.error("Missing id [{}]", value);
});
}
assertThat(hits.getTotalHits(), equalTo(indexer.totalIndexedDocs()));
logger.info("--> DONE search test round {}", i + 1);
}
if (!ranOnce) {
fail();
}
}
}
use of org.elasticsearch.test.junit.annotations.TestLogging in project elasticsearch by elastic.
the class RelocationIT method testRelocationWhileRefreshing.
@TestLogging("org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.search:TRACE")
public void testRelocationWhileRefreshing() throws Exception {
int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
int numberOfReplicas = randomBoolean() ? 0 : 1;
int numberOfNodes = numberOfReplicas == 0 ? 2 : 3;
logger.info("testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})", numberOfRelocations, numberOfReplicas, numberOfNodes);
String[] nodes = new String[numberOfNodes];
logger.info("--> starting [node_0] ...");
nodes[0] = internalCluster().startNode();
logger.info("--> creating test index ...");
prepareCreate("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numberOfReplicas).put("index.refresh_interval", // we want to control refreshes c
-1)).get();
for (int i = 1; i < numberOfNodes; i++) {
logger.info("--> starting [node_{}] ...", i);
nodes[i] = internalCluster().startNode();
if (i != numberOfNodes - 1) {
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes(Integer.toString(i + 1)).setWaitForGreenStatus().execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
}
}
final Semaphore postRecoveryShards = new Semaphore(0);
final IndexEventListener listener = new IndexEventListener() {
@Override
public void indexShardStateChanged(IndexShard indexShard, @Nullable IndexShardState previousState, IndexShardState currentState, @Nullable String reason) {
if (currentState == IndexShardState.POST_RECOVERY) {
postRecoveryShards.release();
}
}
};
for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getInstances(MockIndexEventListener.TestEventListener.class)) {
eventListener.setNewDelegate(listener);
}
logger.info("--> starting relocations...");
// if we have replicas shift those
int nodeShiftBased = numberOfReplicas;
for (int i = 0; i < numberOfRelocations; i++) {
int fromNode = (i % 2);
int toNode = fromNode == 0 ? 1 : 0;
fromNode += nodeShiftBased;
toNode += nodeShiftBased;
List<IndexRequestBuilder> builders1 = new ArrayList<>();
for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) {
builders1.add(client().prepareIndex("test", "type").setSource("{}", XContentType.JSON));
}
List<IndexRequestBuilder> builders2 = new ArrayList<>();
for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) {
builders2.add(client().prepareIndex("test", "type").setSource("{}", XContentType.JSON));
}
logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]);
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, nodes[fromNode], nodes[toNode])).get();
logger.debug("--> index [{}] documents", builders1.size());
indexRandom(false, true, builders1);
// wait for shard to reach post recovery
postRecoveryShards.acquire(1);
logger.debug("--> index [{}] documents", builders2.size());
indexRandom(true, true, builders2);
// verify cluster was finished.
assertFalse(client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).setWaitForEvents(Priority.LANGUID).setTimeout("30s").get().isTimedOut());
logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode);
logger.debug("--> verifying all searches return the same number of docs");
long expectedCount = -1;
for (Client client : clients()) {
SearchResponse response = client.prepareSearch("test").setPreference("_local").setSize(0).get();
assertNoFailures(response);
if (expectedCount < 0) {
expectedCount = response.getHits().getTotalHits();
} else {
assertEquals(expectedCount, response.getHits().getTotalHits());
}
}
}
}
use of org.elasticsearch.test.junit.annotations.TestLogging in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testAckedIndexing.
/**
* Test that we do not loose document whose indexing request was successful, under a randomly selected disruption scheme
* We also collect & report the type of indexing failures that occur.
* <p>
* This test is a superset of tests run in the Jepsen test suite, with the exception of versioned updates
*/
@TestLogging("_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE,discovery:TRACE," + "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," + "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE")
public void testAckedIndexing() throws Exception {
final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
final String timeout = seconds + "s";
final List<String> nodes = startCluster(rarely() ? 5 : 3);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2)).put(IndexSettings.INDEX_SEQ_NO_CHECKPOINT_SYNC_INTERVAL.getKey(), randomBoolean() ? "5s" : "200ms")));
ensureGreen();
ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
logger.info("disruption scheme [{}] added", disruptionScheme);
// id -> node sent.
final ConcurrentHashMap<String, String> ackedDocs = new ConcurrentHashMap<>();
final AtomicBoolean stop = new AtomicBoolean(false);
List<Thread> indexers = new ArrayList<>(nodes.size());
List<Semaphore> semaphores = new ArrayList<>(nodes.size());
final AtomicInteger idGenerator = new AtomicInteger(0);
final AtomicReference<CountDownLatch> countDownLatchRef = new AtomicReference<>();
final List<Exception> exceptedExceptions = Collections.synchronizedList(new ArrayList<Exception>());
logger.info("starting indexers");
try {
for (final String node : nodes) {
final Semaphore semaphore = new Semaphore(0);
semaphores.add(semaphore);
final Client client = client(node);
final String name = "indexer_" + indexers.size();
final int numPrimaries = getNumShards("test").numPrimaries;
Thread thread = new Thread(() -> {
while (!stop.get()) {
String id = null;
try {
if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
continue;
}
logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
try {
id = Integer.toString(idGenerator.incrementAndGet());
int shard = Math.floorMod(Murmur3HashFunction.hash(id), numPrimaries);
logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}", XContentType.JSON).setTimeout(timeout).get(timeout);
assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
ackedDocs.put(id, node);
logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
} catch (ElasticsearchException e) {
exceptedExceptions.add(e);
final String docId = id;
logger.trace((Supplier<?>) () -> new ParameterizedMessage("[{}] failed id [{}] through node [{}]", name, docId, node), e);
} finally {
countDownLatchRef.get().countDown();
logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
}
} catch (InterruptedException e) {
// fine - semaphore interrupt
} catch (AssertionError | Exception e) {
logger.info((Supplier<?>) () -> new ParameterizedMessage("unexpected exception in background thread of [{}]", node), e);
}
}
});
thread.setName(name);
thread.start();
indexers.add(thread);
}
int docsPerIndexer = randomInt(3);
logger.info("indexing {} docs per indexer before partition", docsPerIndexer);
countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
for (Semaphore semaphore : semaphores) {
semaphore.release(docsPerIndexer);
}
assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES));
for (int iter = 1 + randomInt(2); iter > 0; iter--) {
logger.info("starting disruptions & indexing (iteration [{}])", iter);
disruptionScheme.startDisrupting();
docsPerIndexer = 1 + randomInt(5);
logger.info("indexing {} docs per indexer during partition", docsPerIndexer);
countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
Collections.shuffle(semaphores, random());
for (Semaphore semaphore : semaphores) {
assertThat(semaphore.availablePermits(), equalTo(0));
semaphore.release(docsPerIndexer);
}
logger.info("waiting for indexing requests to complete");
assertTrue(countDownLatchRef.get().await(docsPerIndexer * seconds * 1000 + 2000, TimeUnit.MILLISECONDS));
logger.info("stopping disruption");
disruptionScheme.stopDisrupting();
for (String node : internalCluster().getNodeNames()) {
ensureStableCluster(nodes.size(), TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()), true, node);
}
ensureGreen("test");
logger.info("validating successful docs");
assertBusy(() -> {
for (String node : nodes) {
try {
logger.debug("validating through node [{}] ([{}] acked docs)", node, ackedDocs.size());
for (String id : ackedDocs.keySet()) {
assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found", client(node).prepareGet("test", "type", id).setPreference("_local").get().isExists());
}
} catch (AssertionError | NoShardAvailableActionException e) {
throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e);
}
}
}, 30, TimeUnit.SECONDS);
logger.info("done validating (iteration [{}])", iter);
}
} finally {
if (exceptedExceptions.size() > 0) {
StringBuilder sb = new StringBuilder();
for (Exception e : exceptedExceptions) {
sb.append("\n").append(e.getMessage());
}
logger.debug("Indexing exceptions during disruption: {}", sb);
}
logger.info("shutting down indexers");
stop.set(true);
for (Thread indexer : indexers) {
indexer.interrupt();
indexer.join(60000);
}
}
}
use of org.elasticsearch.test.junit.annotations.TestLogging in project elasticsearch by elastic.
the class CorruptedFileIT method testCorruptFileThenSnapshotAndRestore.
/**
* Tests that restoring of a corrupted shard fails and we get a partial snapshot.
* TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several
* parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard.
*/
@TestLogging("org.elasticsearch.monitor.fs:DEBUG")
public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException {
int numDocs = scaledRandomIntBetween(100, 1000);
internalCluster().ensureAtLeastNumDataNodes(2);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, // no replicas for this test
"0").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
new ByteSizeValue(1, ByteSizeUnit.PB))));
ensureGreen();
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
}
indexRandom(true, builders);
ensureGreen();
assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
// we have to flush at least once here since we don't corrupt the translog
SearchResponse countResponse = client().prepareSearch().setSize(0).get();
assertHitCount(countResponse, numDocs);
ShardRouting shardRouting = corruptRandomPrimaryFile(false);
// we don't corrupt segments.gen since S/R doesn't snapshot this file
// the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before
// it snapshots and that will write a new segments.X+1 file
logger.info("--> creating repository");
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(Settings.builder().put("location", randomRepoPath().toAbsolutePath()).put("compress", randomBoolean()).put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get();
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
logger.info("failed during snapshot -- maybe SI file got corrupted");
final List<Path> files = listShardFiles(shardRouting);
Path corruptedFile = null;
for (Path file : files) {
if (file.getFileName().toString().startsWith("corrupted_")) {
corruptedFile = file;
break;
}
}
assertThat(corruptedFile, notNullValue());
}
Aggregations