use of org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse in project elasticsearch by elastic.
the class CancellableTasksTests method testChildTasksCancellation.
public void testChildTasksCancellation() throws Exception {
setupTestNodes(Settings.EMPTY);
connectNodes(testNodes);
CountDownLatch responseLatch = new CountDownLatch(1);
final AtomicReference<NodesResponse> responseReference = new AtomicReference<>();
final AtomicReference<Throwable> throwableReference = new AtomicReference<>();
Task mainTask = startCancellableTestNodesAction(true, nodesCount, new ActionListener<NodesResponse>() {
@Override
public void onResponse(NodesResponse listTasksResponse) {
responseReference.set(listTasksResponse);
responseLatch.countDown();
}
@Override
public void onFailure(Exception e) {
throwableReference.set(e);
responseLatch.countDown();
}
});
// Cancel all child tasks without cancelling the main task, which should quit on its own
CancelTasksRequest request = new CancelTasksRequest();
request.setReason("Testing Cancellation");
request.setParentTaskId(new TaskId(testNodes[0].getNodeId(), mainTask.getId()));
// And send the cancellation request to a random node
CancelTasksResponse response = testNodes[randomIntBetween(1, testNodes.length - 1)].transportCancelTasksAction.execute(request).get();
// Awaiting for the main task to finish
responseLatch.await();
// Should have cancelled tasks on all nodes
assertThat(response.getTasks().size(), equalTo(testNodes.length));
assertBusy(() -> {
try {
// Make sure that main task is no longer running
ListTasksResponse listTasksResponse = testNodes[randomIntBetween(0, testNodes.length - 1)].transportListTasksAction.execute(new ListTasksRequest().setTaskId(new TaskId(testNodes[0].getNodeId(), mainTask.getId()))).get();
assertEquals(0, listTasksResponse.getTasks().size());
} catch (ExecutionException | InterruptedException ex) {
throw new RuntimeException(ex);
}
});
}
use of org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse in project elasticsearch by elastic.
the class CancellableTasksTests method testBasicTaskCancellation.
public void testBasicTaskCancellation() throws Exception {
setupTestNodes(Settings.EMPTY);
connectNodes(testNodes);
CountDownLatch responseLatch = new CountDownLatch(1);
boolean waitForActionToStart = randomBoolean();
logger.info("waitForActionToStart is set to {}", waitForActionToStart);
final AtomicReference<NodesResponse> responseReference = new AtomicReference<>();
final AtomicReference<Throwable> throwableReference = new AtomicReference<>();
int blockedNodesCount = randomIntBetween(0, nodesCount);
Task mainTask = startCancellableTestNodesAction(waitForActionToStart, blockedNodesCount, new ActionListener<NodesResponse>() {
@Override
public void onResponse(NodesResponse listTasksResponse) {
responseReference.set(listTasksResponse);
responseLatch.countDown();
}
@Override
public void onFailure(Exception e) {
throwableReference.set(e);
responseLatch.countDown();
}
});
// Cancel main task
CancelTasksRequest request = new CancelTasksRequest();
request.setReason("Testing Cancellation");
request.setTaskId(new TaskId(testNodes[0].getNodeId(), mainTask.getId()));
// And send the cancellation request to a random node
CancelTasksResponse response = testNodes[randomIntBetween(0, testNodes.length - 1)].transportCancelTasksAction.execute(request).get();
// Awaiting for the main task to finish
responseLatch.await();
if (response.getTasks().size() == 0) {
// We didn't cancel the request and it finished successfully
// That should be rare and can be only in case we didn't block on a single node
assertEquals(0, blockedNodesCount);
// Make sure that the request was successful
assertNull(throwableReference.get());
assertNotNull(responseReference.get());
assertEquals(nodesCount, responseReference.get().getNodes().size());
assertEquals(0, responseReference.get().failureCount());
} else {
// We canceled the request, in this case it should have fail, but we should get partial response
assertNull(throwableReference.get());
assertEquals(nodesCount, responseReference.get().failureCount() + responseReference.get().getNodes().size());
// and we should have at least as many failures as the number of blocked operations
// (we might have cancelled some non-blocked operations before they even started and that's ok)
assertThat(responseReference.get().failureCount(), greaterThanOrEqualTo(blockedNodesCount));
// We should have the information about the cancelled task in the cancel operation response
assertEquals(1, response.getTasks().size());
assertEquals(mainTask.getId(), response.getTasks().get(0).getId());
// Verify that all cancelled tasks reported that they support cancellation
for (TaskInfo taskInfo : response.getTasks()) {
assertTrue(taskInfo.isCancellable());
}
}
// Make sure that tasks are no longer running
ListTasksResponse listTasksResponse = testNodes[randomIntBetween(0, testNodes.length - 1)].transportListTasksAction.execute(new ListTasksRequest().setTaskId(new TaskId(testNodes[0].getNodeId(), mainTask.getId()))).get();
assertEquals(0, listTasksResponse.getTasks().size());
// Make sure that there are no leftover bans, the ban removal is async, so we might return from the cancellation
// while the ban is still there, but it should disappear shortly
assertBusy(() -> {
for (int i = 0; i < testNodes.length; i++) {
assertEquals("No bans on the node " + i, 0, testNodes[i].transportService.getTaskManager().getBanCount());
}
});
}
use of org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse in project elasticsearch by elastic.
the class CancelTests method testCancel.
/**
* Executes the cancellation test
*/
private void testCancel(String action, AbstractBulkByScrollRequestBuilder<?, ?> builder, CancelAssertion assertion, Matcher<String> taskDescriptionMatcher) throws Exception {
createIndex(INDEX);
// Total number of documents created for this test (~10 per primary shard per shard)
int numDocs = getNumShards(INDEX).numPrimaries * 10 * builder.request().getSlices();
ALLOWED_OPERATIONS.release(numDocs);
indexRandom(true, false, true, IntStream.range(0, numDocs).mapToObj(i -> client().prepareIndex(INDEX, TYPE, String.valueOf(i)).setSource("n", i)).collect(Collectors.toList()));
// Checks that the all documents have been indexed and correctly counted
assertHitCount(client().prepareSearch(INDEX).setSize(0).get(), numDocs);
assertThat(ALLOWED_OPERATIONS.drainPermits(), equalTo(0));
// Scroll by 1 so that cancellation is easier to control
builder.source().setSize(1);
/* Allow a random number of the documents less the number of workers to be modified by the reindex action. That way at least one
* worker is blocked. */
int numModifiedDocs = randomIntBetween(builder.request().getSlices() * 2, numDocs);
ALLOWED_OPERATIONS.release(numModifiedDocs - builder.request().getSlices());
// Now execute the reindex action...
ListenableActionFuture<? extends BulkByScrollResponse> future = builder.execute();
/* ... and waits for the indexing operation listeners to block. It is important to realize that some of the workers might have
* exhausted their slice while others might have quite a bit left to work on. We can't control that. */
awaitBusy(() -> ALLOWED_OPERATIONS.hasQueuedThreads() && ALLOWED_OPERATIONS.availablePermits() == 0);
// Status should show the task running
TaskInfo mainTask = findTaskToCancel(action, builder.request().getSlices());
BulkByScrollTask.Status status = (BulkByScrollTask.Status) mainTask.getStatus();
assertNull(status.getReasonCancelled());
// Description shouldn't be empty
assertThat(mainTask.getDescription(), taskDescriptionMatcher);
// Cancel the request while the reindex action is blocked by the indexing operation listeners.
// This will prevent further requests from being sent.
ListTasksResponse cancelTasksResponse = client().admin().cluster().prepareCancelTasks().setTaskId(mainTask.getTaskId()).get();
cancelTasksResponse.rethrowFailures("Cancel");
assertThat(cancelTasksResponse.getTasks(), hasSize(1));
// The status should now show canceled. The request will still be in the list because it is (or its children are) still blocked.
mainTask = client().admin().cluster().prepareGetTask(mainTask.getTaskId()).get().getTask().getTask();
status = (BulkByScrollTask.Status) mainTask.getStatus();
assertEquals(CancelTasksRequest.DEFAULT_REASON, status.getReasonCancelled());
if (builder.request().getSlices() > 1) {
boolean foundCancelled = false;
ListTasksResponse sliceList = client().admin().cluster().prepareListTasks().setParentTaskId(mainTask.getTaskId()).setDetailed(true).get();
sliceList.rethrowFailures("Fetch slice tasks");
for (TaskInfo slice : sliceList.getTasks()) {
BulkByScrollTask.Status sliceStatus = (BulkByScrollTask.Status) slice.getStatus();
if (sliceStatus.getReasonCancelled() == null)
continue;
assertEquals(CancelTasksRequest.DEFAULT_REASON, sliceStatus.getReasonCancelled());
foundCancelled = true;
}
assertTrue("Didn't find at least one sub task that was cancelled", foundCancelled);
}
// Unblock the last operations
ALLOWED_OPERATIONS.release(builder.request().getSlices());
// Checks that no more operations are executed
assertBusy(() -> {
if (builder.request().getSlices() == 1) {
/* We can only be sure that we've drained all the permits if we only use a single worker. Otherwise some worker may have
* exhausted all of its documents before we blocked. */
assertEquals(0, ALLOWED_OPERATIONS.availablePermits());
}
assertEquals(0, ALLOWED_OPERATIONS.getQueueLength());
});
// And check the status of the response
BulkByScrollResponse response = future.get();
assertThat(response.getReasonCancelled(), equalTo("by user request"));
assertThat(response.getBulkFailures(), emptyIterable());
assertThat(response.getSearchFailures(), emptyIterable());
if (builder.request().getSlices() >= 1) {
// If we have more than one worker we might not have made all the modifications
numModifiedDocs -= ALLOWED_OPERATIONS.availablePermits();
}
flushAndRefresh(INDEX);
assertion.assertThat(response, numDocs, numModifiedDocs);
}
use of org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse in project elasticsearch by elastic.
the class CancelTests method findTaskToCancel.
private TaskInfo findTaskToCancel(String actionName, int workerCount) {
ListTasksResponse tasks;
long start = System.nanoTime();
do {
tasks = client().admin().cluster().prepareListTasks().setActions(actionName).setDetailed(true).get();
tasks.rethrowFailures("Find tasks to cancel");
for (TaskInfo taskInfo : tasks.getTasks()) {
// Skip tasks with a parent because those are children of the task we want to cancel
if (false == taskInfo.getParentTaskId().isSet()) {
return taskInfo;
}
}
} while (System.nanoTime() - start < TimeUnit.SECONDS.toNanos(10));
throw new AssertionError("Couldn't find task to rethrottle after waiting tasks=" + tasks.getTasks());
}
use of org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse in project elasticsearch by elastic.
the class InternalTestCluster method assertShardIndexCounter.
private void assertShardIndexCounter() throws Exception {
assertBusy(() -> {
final Collection<NodeAndClient> nodesAndClients = nodes.values();
for (NodeAndClient nodeAndClient : nodesAndClients) {
IndicesService indexServices = getInstance(IndicesService.class, nodeAndClient.name);
for (IndexService indexService : indexServices) {
for (IndexShard indexShard : indexService) {
int activeOperationsCount = indexShard.getActiveOperationsCount();
if (activeOperationsCount > 0) {
TaskManager taskManager = getInstance(TransportService.class, nodeAndClient.name).getTaskManager();
DiscoveryNode localNode = getInstance(ClusterService.class, nodeAndClient.name).localNode();
List<TaskInfo> taskInfos = taskManager.getTasks().values().stream().filter(task -> task instanceof ReplicationTask).map(task -> task.taskInfo(localNode.getId(), true)).collect(Collectors.toList());
ListTasksResponse response = new ListTasksResponse(taskInfos, Collections.emptyList(), Collections.emptyList());
try {
XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint().value(response);
throw new AssertionError("expected index shard counter on shard " + indexShard.shardId() + " on node " + nodeAndClient.name + " to be 0 but was " + activeOperationsCount + ". Current replication tasks on node:\n" + builder.string());
} catch (IOException e) {
throw new RuntimeException("caught exception while building response [" + response + "]", e);
}
}
}
}
}
});
}
Aggregations