use of java.util.concurrent.CopyOnWriteArrayList in project elasticsearch by elastic.
the class UpdateIT method testConcurrentUpdateWithRetryOnConflict.
public void testConcurrentUpdateWithRetryOnConflict() throws Exception {
final boolean useBulkApi = randomBoolean();
createTestIndex();
ensureGreen();
int numberOfThreads = scaledRandomIntBetween(2, 5);
final CountDownLatch latch = new CountDownLatch(numberOfThreads);
final CountDownLatch startLatch = new CountDownLatch(1);
final int numberOfUpdatesPerThread = scaledRandomIntBetween(100, 500);
final List<Exception> failures = new CopyOnWriteArrayList<>();
for (int i = 0; i < numberOfThreads; i++) {
Runnable r = new Runnable() {
@Override
public void run() {
try {
startLatch.await();
for (int i = 0; i < numberOfUpdatesPerThread; i++) {
if (i % 100 == 0) {
logger.debug("Client [{}] issued [{}] of [{}] requests", Thread.currentThread().getName(), i, numberOfUpdatesPerThread);
}
if (useBulkApi) {
UpdateRequestBuilder updateRequestBuilder = client().prepareUpdate(indexOrAlias(), "type1", Integer.toString(i)).setScript(new Script(ScriptType.INLINE, "field_inc", "field", Collections.emptyMap())).setRetryOnConflict(Integer.MAX_VALUE).setUpsert(jsonBuilder().startObject().field("field", 1).endObject());
client().prepareBulk().add(updateRequestBuilder).execute().actionGet();
} else {
client().prepareUpdate(indexOrAlias(), "type1", Integer.toString(i)).setScript(new Script(ScriptType.INLINE, "field_inc", "field", Collections.emptyMap())).setRetryOnConflict(Integer.MAX_VALUE).setUpsert(jsonBuilder().startObject().field("field", 1).endObject()).execute().actionGet();
}
}
logger.info("Client [{}] issued all [{}] requests.", Thread.currentThread().getName(), numberOfUpdatesPerThread);
} catch (InterruptedException e) {
// test infrastructure kills long-running tests by interrupting them, thus we handle this case separately
logger.warn("Test was forcefully stopped. Client [{}] may still have outstanding requests.", Thread.currentThread().getName());
failures.add(e);
Thread.currentThread().interrupt();
} catch (Exception e) {
failures.add(e);
} finally {
latch.countDown();
}
}
};
Thread updater = new Thread(r);
updater.setName("UpdateIT-Client-" + i);
updater.start();
}
startLatch.countDown();
latch.await();
for (Throwable throwable : failures) {
logger.info("Captured failure on concurrent update:", throwable);
}
assertThat(failures.size(), equalTo(0));
for (int i = 0; i < numberOfUpdatesPerThread; i++) {
GetResponse response = client().prepareGet("test", "type1", Integer.toString(i)).execute().actionGet();
assertThat(response.getId(), equalTo(Integer.toString(i)));
assertThat(response.isExists(), equalTo(true));
assertThat(response.getVersion(), equalTo((long) numberOfThreads));
assertThat((Integer) response.getSource().get("field"), equalTo(numberOfThreads));
}
}
use of java.util.concurrent.CopyOnWriteArrayList in project elasticsearch by elastic.
the class NodeJoinControllerTests method testNormalConcurrentJoins.
public void testNormalConcurrentJoins() throws InterruptedException {
Thread[] threads = new Thread[3 + randomInt(5)];
ArrayList<DiscoveryNode> nodes = new ArrayList<>();
nodes.add(clusterService.localNode());
final CyclicBarrier barrier = new CyclicBarrier(threads.length);
final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>();
for (int i = 0; i < threads.length; i++) {
final DiscoveryNode node = newNode(i);
final int iterations = rarely() ? randomIntBetween(1, 4) : 1;
nodes.add(node);
threads[i] = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.error("unexpected error in join thread", e);
backgroundExceptions.add(e);
}
@Override
protected void doRun() throws Exception {
barrier.await();
for (int i = 0; i < iterations; i++) {
logger.debug("{} joining", node);
joinNode(node);
}
}
}, "t_" + i);
threads[i].start();
}
logger.info("--> waiting for joins to complete");
for (Thread thread : threads) {
thread.join();
}
assertNodesInCurrentState(nodes);
}
use of java.util.concurrent.CopyOnWriteArrayList in project elasticsearch by elastic.
the class NodeJoinControllerTests method testElectionWithConcurrentJoins.
public void testElectionWithConcurrentJoins() throws InterruptedException, BrokenBarrierException {
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(clusterService.state().nodes()).masterNodeId(null);
setState(clusterService, ClusterState.builder(clusterService.state()).nodes(nodesBuilder));
nodeJoinController.startElectionContext();
Thread[] threads = new Thread[3 + randomInt(5)];
final int requiredJoins = randomInt(threads.length);
ArrayList<DiscoveryNode> nodes = new ArrayList<>();
nodes.add(clusterService.localNode());
final CyclicBarrier barrier = new CyclicBarrier(threads.length + 1);
final List<Throwable> backgroundExceptions = new CopyOnWriteArrayList<>();
for (int i = 0; i < threads.length; i++) {
final DiscoveryNode node = newNode(i, true);
final int iterations = rarely() ? randomIntBetween(1, 4) : 1;
nodes.add(node);
threads[i] = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.error("unexpected error in join thread", e);
backgroundExceptions.add(e);
}
@Override
protected void doRun() throws Exception {
barrier.await();
for (int i = 0; i < iterations; i++) {
logger.debug("{} joining", node);
joinNode(node);
}
}
}, "t_" + i);
threads[i].start();
}
barrier.await();
logger.info("--> waiting to be elected as master (required joins [{}])", requiredJoins);
final AtomicReference<Throwable> failure = new AtomicReference<>();
final CountDownLatch latch = new CountDownLatch(1);
nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() {
@Override
public void onElectedAsMaster(ClusterState state) {
assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true));
latch.countDown();
}
@Override
public void onFailure(Throwable t) {
logger.error("unexpected error while waiting to be elected as master", t);
failure.set(t);
latch.countDown();
}
});
latch.await();
ExceptionsHelper.reThrowIfNotNull(failure.get());
logger.info("--> waiting for joins to complete");
for (Thread thread : threads) {
thread.join();
}
assertNodesInCurrentState(nodes);
}
use of java.util.concurrent.CopyOnWriteArrayList in project elasticsearch by elastic.
the class IndexWithShadowReplicasIT method testPrimaryRelocationWithConcurrentIndexing.
public void testPrimaryRelocationWithConcurrentIndexing() throws Exception {
Path dataPath = createTempDir();
Settings nodeSettings = nodeSettings(dataPath);
String node1 = internalCluster().startNode(nodeSettings);
final String IDX = "test";
Settings idxSettings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString()).put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true).build();
prepareCreate(IDX).setSettings(idxSettings).addMapping("doc", "foo", "type=text").get();
// Node1 has the primary, now node2 has the replica
String node2 = internalCluster().startNode(nodeSettings);
ensureGreen(IDX);
flushAndRefresh(IDX);
String node3 = internalCluster().startNode(nodeSettings);
final AtomicInteger counter = new AtomicInteger(0);
final CountDownLatch started = new CountDownLatch(1);
final int numPhase1Docs = scaledRandomIntBetween(25, 200);
final int numPhase2Docs = scaledRandomIntBetween(25, 200);
final CountDownLatch phase1finished = new CountDownLatch(1);
final CountDownLatch phase2finished = new CountDownLatch(1);
final CopyOnWriteArrayList<Exception> exceptions = new CopyOnWriteArrayList<>();
Thread thread = new Thread() {
@Override
public void run() {
started.countDown();
while (counter.get() < (numPhase1Docs + numPhase2Docs)) {
try {
final IndexResponse indexResponse = client().prepareIndex(IDX, "doc", Integer.toString(counter.incrementAndGet())).setSource("foo", "bar").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
} catch (Exception e) {
exceptions.add(e);
}
final int docCount = counter.get();
if (docCount == numPhase1Docs) {
phase1finished.countDown();
}
}
logger.info("--> stopping indexing thread");
phase2finished.countDown();
}
};
thread.start();
started.await();
// wait for a certain number of documents to be indexed
phase1finished.await();
logger.info("--> excluding {} from allocation", node1);
// now prevent primary from being allocated on node 1 move to node_3
Settings build = Settings.builder().put("index.routing.allocation.exclude._name", node1).build();
client().admin().indices().prepareUpdateSettings(IDX).setSettings(build).execute().actionGet();
// wait for more documents to be indexed post-recovery, also waits for
// indexing thread to stop
phase2finished.await();
ExceptionsHelper.rethrowAndSuppress(exceptions);
ensureGreen(IDX);
thread.join();
logger.info("--> performing query");
flushAndRefresh();
SearchResponse resp = client().prepareSearch(IDX).setQuery(matchAllQuery()).get();
assertHitCount(resp, counter.get());
assertHitCount(resp, numPhase1Docs + numPhase2Docs);
}
use of java.util.concurrent.CopyOnWriteArrayList in project elasticsearch by elastic.
the class CorruptedFileIT method testCorruptFileAndRecover.
/**
* Tests that we can actually recover from a corruption on the primary given that we have replica shards around.
*/
public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException {
int numDocs = scaledRandomIntBetween(100, 1000);
// have enough space for 3 copies
internalCluster().ensureAtLeastNumDataNodes(3);
if (cluster().numDataNodes() == 3) {
logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
}
assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
new ByteSizeValue(1, ByteSizeUnit.PB))));
ensureGreen();
disableAllocation("test");
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
}
indexRandom(true, builders);
ensureGreen();
assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
// we have to flush at least once here since we don't corrupt the translog
SearchResponse countResponse = client().prepareSearch().setSize(0).get();
assertHitCount(countResponse, numDocs);
final int numShards = numShards("test");
ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
logger.info("--> {} corrupted", corruptedShardRouting);
enableAllocation("test");
/*
* we corrupted the primary shard - now lets make sure we never recover from it successfully
*/
Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
ClusterHealthResponse health = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus().timeout(// sometimes due to cluster rebalacing and random settings default timeout is just not enough.
"5m").waitForNoRelocatingShards(true)).actionGet();
if (health.isTimedOut()) {
logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
}
assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
final int numIterations = scaledRandomIntBetween(5, 20);
for (int i = 0; i < numIterations; i++) {
SearchResponse response = client().prepareSearch().setSize(numDocs).get();
assertHitCount(response, numDocs);
}
/*
* now hook into the IndicesService and register a close listener to
* run the checkindex. if the corruption is still there we will catch it.
*/
// primary + 2 replicas
final CountDownLatch latch = new CountDownLatch(numShards * 3);
final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
final IndexEventListener listener = new IndexEventListener() {
@Override
public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
if (indexShard != null) {
Store store = indexShard.store();
store.incRef();
try {
if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) {
return;
}
try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
BytesStreamOutput os = new BytesStreamOutput();
PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
checkIndex.setInfoStream(out);
out.flush();
CheckIndex.Status status = checkIndex.checkIndex();
if (!status.clean) {
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
throw new IOException("index check failure");
}
}
} catch (Exception e) {
exception.add(e);
} finally {
store.decRef();
latch.countDown();
}
}
}
};
for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
eventListener.setNewDelegate(listener);
}
try {
client().admin().indices().prepareDelete("test").get();
latch.await();
assertThat(exception, empty());
} finally {
for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
eventListener.setNewDelegate(null);
}
}
}
Aggregations