use of java.util.concurrent.BrokenBarrierException in project elasticsearch by elastic.
the class IndexShardTests method testRelocatedShardCanNotBeRevivedConcurrently.
public void testRelocatedShardCanNotBeRevivedConcurrently() throws IOException, InterruptedException, BrokenBarrierException {
final IndexShard shard = newStartedShard(true);
final ShardRouting originalRouting = shard.routingEntry();
shard.updateRoutingEntry(ShardRoutingHelper.relocate(originalRouting, "other_node"));
CyclicBarrier cyclicBarrier = new CyclicBarrier(3);
AtomicReference<Exception> relocationException = new AtomicReference<>();
Thread relocationThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
relocationException.set(e);
}
@Override
protected void doRun() throws Exception {
cyclicBarrier.await();
shard.relocated("test");
}
});
relocationThread.start();
AtomicReference<Exception> cancellingException = new AtomicReference<>();
Thread cancellingThread = new Thread(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
cancellingException.set(e);
}
@Override
protected void doRun() throws Exception {
cyclicBarrier.await();
shard.updateRoutingEntry(originalRouting);
}
});
cancellingThread.start();
cyclicBarrier.await();
relocationThread.join();
cancellingThread.join();
if (shard.state() == IndexShardState.RELOCATED) {
logger.debug("shard was relocated successfully");
assertThat(cancellingException.get(), instanceOf(IllegalIndexShardStateException.class));
assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(true));
assertThat(relocationException.get(), nullValue());
} else {
logger.debug("shard relocation was cancelled");
assertThat(relocationException.get(), instanceOf(IllegalIndexShardStateException.class));
assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(false));
assertThat(cancellingException.get(), nullValue());
}
closeShards(shard);
}
use of java.util.concurrent.BrokenBarrierException in project elasticsearch by elastic.
the class TranslogTests method testConcurrentWriteViewsAndSnapshot.
/**
* Tests that concurrent readers and writes maintain view and snapshot semantics
*/
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
final Thread[] writers = new Thread[randomIntBetween(1, 10)];
final Thread[] readers = new Thread[randomIntBetween(1, 10)];
final int flushEveryOps = randomIntBetween(5, 100);
// used to notify main thread that so many operations have been written so it can simulate a flush
final AtomicReference<CountDownLatch> writtenOpsLatch = new AtomicReference<>(new CountDownLatch(0));
final AtomicLong idGenerator = new AtomicLong();
final CyclicBarrier barrier = new CyclicBarrier(writers.length + readers.length + 1);
// a map of all written ops and their returned location.
final Map<Translog.Operation, Translog.Location> writtenOps = ConcurrentCollections.newConcurrentMap();
// a signal for all threads to stop
final AtomicBoolean run = new AtomicBoolean(true);
// any errors on threads
final List<Exception> errors = new CopyOnWriteArrayList<>();
logger.debug("using [{}] readers. [{}] writers. flushing every ~[{}] ops.", readers.length, writers.length, flushEveryOps);
for (int i = 0; i < writers.length; i++) {
final String threadName = "writer_" + i;
final int threadId = i;
writers[i] = new Thread(new AbstractRunnable() {
@Override
public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
barrier.await();
int counter = 0;
while (run.get()) {
long id = idGenerator.incrementAndGet();
final Translog.Operation op;
final Translog.Operation.Type type = Translog.Operation.Type.values()[((int) (id % Translog.Operation.Type.values().length))];
switch(type) {
case CREATE:
case INDEX:
op = new Translog.Index("type", "" + id, new byte[] { (byte) id });
break;
case DELETE:
op = new Translog.Delete(newUid("" + id));
break;
case NO_OP:
op = new Translog.NoOp(id, id, Long.toString(id));
break;
default:
throw new AssertionError("unsupported operation type [" + type + "]");
}
Translog.Location location = translog.add(op);
Translog.Location existing = writtenOps.put(op, location);
if (existing != null) {
fail("duplicate op [" + op + "], old entry at " + location);
}
if (id % writers.length == threadId) {
translog.ensureSynced(location);
}
writtenOpsLatch.get().countDown();
counter++;
}
logger.debug("--> [{}] done. wrote [{}] ops.", threadName, counter);
}
@Override
public void onFailure(Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("--> writer [{}] had an error", threadName), e);
errors.add(e);
}
}, threadName);
writers[i].start();
}
for (int i = 0; i < readers.length; i++) {
final String threadId = "reader_" + i;
readers[i] = new Thread(new AbstractRunnable() {
Translog.View view = null;
Set<Translog.Operation> writtenOpsAtView;
@Override
public void onFailure(Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("--> reader [{}] had an error", threadId), e);
errors.add(e);
try {
closeView();
} catch (IOException inner) {
inner.addSuppressed(e);
logger.error("unexpected error while closing view, after failure", inner);
}
}
void closeView() throws IOException {
if (view != null) {
view.close();
}
}
void newView() throws IOException {
closeView();
view = translog.newView();
// captures the currently written ops so we know what to expect from the view
writtenOpsAtView = new HashSet<>(writtenOps.keySet());
logger.debug("--> [{}] opened view from [{}]", threadId, view.minTranslogGeneration());
}
@Override
protected void doRun() throws Exception {
barrier.await();
int iter = 0;
while (run.get()) {
if (iter++ % 10 == 0) {
newView();
}
// captures al views that are written since the view was created (with a small caveat see bellow)
// these are what we expect the snapshot to return (and potentially some more).
Set<Translog.Operation> expectedOps = new HashSet<>(writtenOps.keySet());
expectedOps.removeAll(writtenOpsAtView);
Translog.Snapshot snapshot = view.snapshot();
Translog.Operation op;
while ((op = snapshot.next()) != null) {
expectedOps.remove(op);
}
if (expectedOps.isEmpty() == false) {
StringBuilder missed = new StringBuilder("missed ").append(expectedOps.size()).append(" operations");
boolean failed = false;
for (Translog.Operation expectedOp : expectedOps) {
final Translog.Location loc = writtenOps.get(expectedOp);
if (loc.generation < view.minTranslogGeneration()) {
// may yet be available in writtenOpsAtView, meaning we will erroneously expect them
continue;
}
failed = true;
missed.append("\n --> [").append(expectedOp).append("] written at ").append(loc);
}
if (failed) {
fail(missed.toString());
}
}
// slow down things a bit and spread out testing..
writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS);
}
closeView();
logger.debug("--> [{}] done. tested [{}] snapshots", threadId, iter);
}
}, threadId);
readers[i].start();
}
barrier.await();
try {
for (int iterations = scaledRandomIntBetween(10, 200); iterations > 0 && errors.isEmpty(); iterations--) {
writtenOpsLatch.set(new CountDownLatch(flushEveryOps));
while (writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS) == false) {
if (errors.size() > 0) {
break;
}
}
translog.commit();
}
} finally {
run.set(false);
logger.debug("--> waiting for threads to stop");
for (Thread thread : writers) {
thread.join();
}
for (Thread thread : readers) {
thread.join();
}
if (errors.size() > 0) {
Throwable e = errors.get(0);
for (Throwable suppress : errors.subList(1, errors.size())) {
e.addSuppressed(suppress);
}
throw e;
}
logger.info("--> test done. total ops written [{}]", writtenOps.size());
}
}
use of java.util.concurrent.BrokenBarrierException in project elasticsearch by elastic.
the class IndexStatsIT method testConcurrentIndexingAndStatsRequests.
/**
* Test that we can safely concurrently index and get stats. This test was inspired by a serialization issue that arose due to a race
* getting doc stats during heavy indexing. The race could lead to deleted docs being negative which would then be serialized as a
* variable-length long. Since serialization of negative longs using a variable-length format was unsupported
* ({@link org.elasticsearch.common.io.stream.StreamOutput#writeVLong(long)}), the stream would become corrupted. Here, we want to test
* that we can continue to get stats while indexing.
*/
public void testConcurrentIndexingAndStatsRequests() throws BrokenBarrierException, InterruptedException, ExecutionException {
final AtomicInteger idGenerator = new AtomicInteger();
final int numberOfIndexingThreads = Runtime.getRuntime().availableProcessors();
final int numberOfStatsThreads = 4 * numberOfIndexingThreads;
final CyclicBarrier barrier = new CyclicBarrier(1 + numberOfIndexingThreads + numberOfStatsThreads);
final AtomicBoolean stop = new AtomicBoolean();
final List<Thread> threads = new ArrayList<>(numberOfIndexingThreads + numberOfIndexingThreads);
final CountDownLatch latch = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
final AtomicReference<List<ShardOperationFailedException>> shardFailures = new AtomicReference<>(new CopyOnWriteArrayList<>());
final AtomicReference<List<Exception>> executionFailures = new AtomicReference<>(new CopyOnWriteArrayList<>());
// increasing the number of shards increases the number of chances any one stats request will hit a race
final CreateIndexRequest createIndexRequest = new CreateIndexRequest("test", Settings.builder().put("index.number_of_shards", 10).build());
client().admin().indices().create(createIndexRequest).get();
// start threads that will index concurrently with stats requests
for (int i = 0; i < numberOfIndexingThreads; i++) {
final Thread thread = new Thread(() -> {
try {
barrier.await();
} catch (final BrokenBarrierException | InterruptedException e) {
failed.set(true);
executionFailures.get().add(e);
latch.countDown();
}
while (!stop.get()) {
final String id = Integer.toString(idGenerator.incrementAndGet());
final IndexResponse response = client().prepareIndex("test", "type", id).setSource("{}", XContentType.JSON).get();
assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED));
}
});
thread.setName("indexing-" + i);
threads.add(thread);
thread.start();
}
// start threads that will get stats concurrently with indexing
for (int i = 0; i < numberOfStatsThreads; i++) {
final Thread thread = new Thread(() -> {
try {
barrier.await();
} catch (final BrokenBarrierException | InterruptedException e) {
failed.set(true);
executionFailures.get().add(e);
latch.countDown();
}
final IndicesStatsRequest request = new IndicesStatsRequest();
request.all();
request.indices(new String[0]);
while (!stop.get()) {
try {
final IndicesStatsResponse response = client().admin().indices().stats(request).get();
if (response.getFailedShards() > 0) {
failed.set(true);
shardFailures.get().addAll(Arrays.asList(response.getShardFailures()));
latch.countDown();
}
} catch (final ExecutionException | InterruptedException e) {
failed.set(true);
executionFailures.get().add(e);
latch.countDown();
}
}
});
thread.setName("stats-" + i);
threads.add(thread);
thread.start();
}
// release the hounds
barrier.await();
// wait for a failure, or for fifteen seconds to elapse
latch.await(15, TimeUnit.SECONDS);
// stop all threads and wait for them to complete
stop.set(true);
for (final Thread thread : threads) {
thread.join();
}
assertThat(shardFailures.get(), emptyCollectionOf(ShardOperationFailedException.class));
assertThat(executionFailures.get(), emptyCollectionOf(Exception.class));
}
use of java.util.concurrent.BrokenBarrierException in project elasticsearch by elastic.
the class ClusterServiceTests method testClusterStateBatchedUpdates.
public void testClusterStateBatchedUpdates() throws BrokenBarrierException, InterruptedException {
AtomicInteger counter = new AtomicInteger();
class Task {
private AtomicBoolean state = new AtomicBoolean();
private final int id;
Task(int id) {
this.id = id;
}
public void execute() {
if (!state.compareAndSet(false, true)) {
throw new IllegalStateException();
} else {
counter.incrementAndGet();
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Task task = (Task) o;
return id == task.id;
}
@Override
public int hashCode() {
return id;
}
@Override
public String toString() {
return Integer.toString(id);
}
}
int numberOfThreads = randomIntBetween(2, 8);
int taskSubmissionsPerThread = randomIntBetween(1, 64);
int numberOfExecutors = Math.max(1, numberOfThreads / 4);
final Semaphore semaphore = new Semaphore(numberOfExecutors);
class TaskExecutor implements ClusterStateTaskExecutor<Task> {
private final List<Set<Task>> taskGroups;
private AtomicInteger counter = new AtomicInteger();
private AtomicInteger batches = new AtomicInteger();
private AtomicInteger published = new AtomicInteger();
TaskExecutor(List<Set<Task>> taskGroups) {
this.taskGroups = taskGroups;
}
@Override
public ClusterTasksResult<Task> execute(ClusterState currentState, List<Task> tasks) throws Exception {
for (Set<Task> expectedSet : taskGroups) {
long count = tasks.stream().filter(expectedSet::contains).count();
assertThat("batched set should be executed together or not at all. Expected " + expectedSet + "s. Executing " + tasks, count, anyOf(equalTo(0L), equalTo((long) expectedSet.size())));
}
tasks.forEach(Task::execute);
counter.addAndGet(tasks.size());
ClusterState maybeUpdatedClusterState = currentState;
if (randomBoolean()) {
maybeUpdatedClusterState = ClusterState.builder(currentState).build();
batches.incrementAndGet();
semaphore.acquire();
}
return ClusterTasksResult.<Task>builder().successes(tasks).build(maybeUpdatedClusterState);
}
@Override
public void clusterStatePublished(ClusterChangedEvent clusterChangedEvent) {
published.incrementAndGet();
semaphore.release();
}
}
ConcurrentMap<String, AtomicInteger> processedStates = new ConcurrentHashMap<>();
List<Set<Task>> taskGroups = new ArrayList<>();
List<TaskExecutor> executors = new ArrayList<>();
for (int i = 0; i < numberOfExecutors; i++) {
executors.add(new TaskExecutor(taskGroups));
}
// randomly assign tasks to executors
List<Tuple<TaskExecutor, Set<Task>>> assignments = new ArrayList<>();
int taskId = 0;
for (int i = 0; i < numberOfThreads; i++) {
for (int j = 0; j < taskSubmissionsPerThread; j++) {
TaskExecutor executor = randomFrom(executors);
Set<Task> tasks = new HashSet<>();
for (int t = randomInt(3); t >= 0; t--) {
tasks.add(new Task(taskId++));
}
taskGroups.add(tasks);
assignments.add(Tuple.tuple(executor, tasks));
}
}
Map<TaskExecutor, Integer> counts = new HashMap<>();
int totalTaskCount = 0;
for (Tuple<TaskExecutor, Set<Task>> assignment : assignments) {
final int taskCount = assignment.v2().size();
counts.merge(assignment.v1(), taskCount, (previous, count) -> previous + count);
totalTaskCount += taskCount;
}
final CountDownLatch updateLatch = new CountDownLatch(totalTaskCount);
final ClusterStateTaskListener listener = new ClusterStateTaskListener() {
@Override
public void onFailure(String source, Exception e) {
fail(ExceptionsHelper.detailedMessage(e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
processedStates.computeIfAbsent(source, key -> new AtomicInteger()).incrementAndGet();
updateLatch.countDown();
}
};
final ConcurrentMap<String, AtomicInteger> submittedTasksPerThread = new ConcurrentHashMap<>();
CyclicBarrier barrier = new CyclicBarrier(1 + numberOfThreads);
for (int i = 0; i < numberOfThreads; i++) {
final int index = i;
Thread thread = new Thread(() -> {
final String threadName = Thread.currentThread().getName();
try {
barrier.await();
for (int j = 0; j < taskSubmissionsPerThread; j++) {
Tuple<TaskExecutor, Set<Task>> assignment = assignments.get(index * taskSubmissionsPerThread + j);
final Set<Task> tasks = assignment.v2();
submittedTasksPerThread.computeIfAbsent(threadName, key -> new AtomicInteger()).addAndGet(tasks.size());
final TaskExecutor executor = assignment.v1();
if (tasks.size() == 1) {
clusterService.submitStateUpdateTask(threadName, tasks.stream().findFirst().get(), ClusterStateTaskConfig.build(randomFrom(Priority.values())), executor, listener);
} else {
Map<Task, ClusterStateTaskListener> taskListeners = new HashMap<>();
tasks.stream().forEach(t -> taskListeners.put(t, listener));
clusterService.submitStateUpdateTasks(threadName, taskListeners, ClusterStateTaskConfig.build(randomFrom(Priority.values())), executor);
}
}
barrier.await();
} catch (BrokenBarrierException | InterruptedException e) {
throw new AssertionError(e);
}
});
thread.start();
}
// wait for all threads to be ready
barrier.await();
// wait for all threads to finish
barrier.await();
// wait until all the cluster state updates have been processed
updateLatch.await();
// and until all of the publication callbacks have completed
semaphore.acquire(numberOfExecutors);
// assert the number of executed tasks is correct
assertEquals(totalTaskCount, counter.get());
// assert each executor executed the correct number of tasks
for (TaskExecutor executor : executors) {
if (counts.containsKey(executor)) {
assertEquals((int) counts.get(executor), executor.counter.get());
assertEquals(executor.batches.get(), executor.published.get());
}
}
// assert the correct number of clusterStateProcessed events were triggered
for (Map.Entry<String, AtomicInteger> entry : processedStates.entrySet()) {
assertThat(submittedTasksPerThread, hasKey(entry.getKey()));
assertEquals("not all tasks submitted by " + entry.getKey() + " received a processed event", entry.getValue().get(), submittedTasksPerThread.get(entry.getKey()).get());
}
}
use of java.util.concurrent.BrokenBarrierException in project elasticsearch by elastic.
the class ClusterServiceTests method testClusterStateUpdateTasksAreExecutedInOrder.
// test that for a single thread, tasks are executed in the order
// that they are submitted
public void testClusterStateUpdateTasksAreExecutedInOrder() throws BrokenBarrierException, InterruptedException {
class TaskExecutor implements ClusterStateTaskExecutor<Integer> {
List<Integer> tasks = new ArrayList<>();
@Override
public ClusterTasksResult<Integer> execute(ClusterState currentState, List<Integer> tasks) throws Exception {
this.tasks.addAll(tasks);
return ClusterTasksResult.<Integer>builder().successes(tasks).build(ClusterState.builder(currentState).build());
}
}
int numberOfThreads = randomIntBetween(2, 8);
TaskExecutor[] executors = new TaskExecutor[numberOfThreads];
for (int i = 0; i < numberOfThreads; i++) {
executors[i] = new TaskExecutor();
}
int tasksSubmittedPerThread = randomIntBetween(2, 1024);
CopyOnWriteArrayList<Tuple<String, Throwable>> failures = new CopyOnWriteArrayList<>();
CountDownLatch updateLatch = new CountDownLatch(numberOfThreads * tasksSubmittedPerThread);
ClusterStateTaskListener listener = new ClusterStateTaskListener() {
@Override
public void onFailure(String source, Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure: [{}]", source), e);
failures.add(new Tuple<>(source, e));
updateLatch.countDown();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
updateLatch.countDown();
}
};
CyclicBarrier barrier = new CyclicBarrier(1 + numberOfThreads);
for (int i = 0; i < numberOfThreads; i++) {
final int index = i;
Thread thread = new Thread(() -> {
try {
barrier.await();
for (int j = 0; j < tasksSubmittedPerThread; j++) {
clusterService.submitStateUpdateTask("[" + index + "][" + j + "]", j, ClusterStateTaskConfig.build(randomFrom(Priority.values())), executors[index], listener);
}
barrier.await();
} catch (InterruptedException | BrokenBarrierException e) {
throw new AssertionError(e);
}
});
thread.start();
}
// wait for all threads to be ready
barrier.await();
// wait for all threads to finish
barrier.await();
updateLatch.await();
assertThat(failures, empty());
for (int i = 0; i < numberOfThreads; i++) {
assertEquals(tasksSubmittedPerThread, executors[i].tasks.size());
for (int j = 0; j < tasksSubmittedPerThread; j++) {
assertNotNull(executors[i].tasks.get(j));
assertEquals("cluster state update task executed out of order", j, (int) executors[i].tasks.get(j));
}
}
}
Aggregations