use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.
the class SubtasksTimesHandler method createSubtaskTimesJson.
public static String createSubtaskTimesJson(AccessExecutionJobVertex jobVertex) throws IOException {
final long now = System.currentTimeMillis();
StringWriter writer = new StringWriter();
JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
gen.writeStartObject();
gen.writeStringField("id", jobVertex.getJobVertexId().toString());
gen.writeStringField("name", jobVertex.getName());
gen.writeNumberField("now", now);
gen.writeArrayFieldStart("subtasks");
int num = 0;
for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
ExecutionState status = vertex.getExecutionState();
long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];
long start = scheduledTime > 0 ? scheduledTime : -1;
long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
long duration = start >= 0 ? end - start : -1L;
gen.writeStartObject();
gen.writeNumberField("subtask", num++);
TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
String locationString = location == null ? "(unassigned)" : location.getHostname();
gen.writeStringField("host", locationString);
gen.writeNumberField("duration", duration);
gen.writeObjectFieldStart("timestamps");
for (ExecutionState state : ExecutionState.values()) {
gen.writeNumberField(state.name(), timestamps[state.ordinal()]);
}
gen.writeEndObject();
gen.writeEndObject();
}
gen.writeEndArray();
gen.writeEndObject();
gen.close();
return writer.toString();
}
use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.
the class SubtaskExecutionAttemptDetailsHandler method createAttemptDetailsJson.
public static String createAttemptDetailsJson(AccessExecution execAttempt, String jobID, String vertexID, @Nullable MetricFetcher fetcher) throws IOException {
StringWriter writer = new StringWriter();
JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
final ExecutionState status = execAttempt.getState();
final long now = System.currentTimeMillis();
TaskManagerLocation location = execAttempt.getAssignedResourceLocation();
String locationString = location == null ? "(unassigned)" : location.getHostname();
long startTime = execAttempt.getStateTimestamp(ExecutionState.DEPLOYING);
if (startTime == 0) {
startTime = -1;
}
long endTime = status.isTerminal() ? execAttempt.getStateTimestamp(status) : -1;
long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;
gen.writeStartObject();
gen.writeNumberField("subtask", execAttempt.getParallelSubtaskIndex());
gen.writeStringField("status", status.name());
gen.writeNumberField("attempt", execAttempt.getAttemptNumber());
gen.writeStringField("host", locationString);
gen.writeNumberField("start-time", startTime);
gen.writeNumberField("end-time", endTime);
gen.writeNumberField("duration", duration);
MutableIOMetrics counts = new MutableIOMetrics();
counts.addIOMetrics(execAttempt, fetcher, jobID, vertexID);
counts.writeIOMetricsAsJson(gen);
gen.writeEndObject();
gen.close();
return writer.toString();
}
use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.
the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.
/**
* Tests the JobManager response when the execution is not registered with
* the ExecutionGraph.
*/
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just finish
sender.setInvokableClass(NoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobVertex sender2 = new JobVertex("Blocking Sender");
sender2.setParallelism(1);
// just block
sender2.setInvokableClass(BlockingNoOpInvokable.class);
sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
while (vertex.getExecutionState() != ExecutionState.FINISHED) {
Thread.sleep(1);
}
IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// Producer finished, request state
Object request = new RequestPartitionProducerState(jid, rid, partitionId);
Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.
the class JobManagerTest method testRequestPartitionState.
/**
* Tests responses to partition state requests.
*/
@Test
public void testRequestPartitionState() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just block
sender.setInvokableClass(BlockingNoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
// This is the mock execution ID of the task requesting the state of the partition
final ExecutionAttemptID receiver = new ExecutionAttemptID();
// Request the execution graph to get the runtime info
jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// - The test ----------------------------------------------------------------------
// 1. All execution states
RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
for (ExecutionState state : ExecutionState.values()) {
ExecutionGraphTestUtils.setVertexState(vertex, state);
Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
assertEquals(state, resp);
}
// 2. Non-existing execution
request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(futurePartitionState, getRemainingTime());
fail("Did not fail with expected RuntimeException");
} catch (RuntimeException e) {
assertEquals(IllegalArgumentException.class, e.getCause().getClass());
}
// 3. Non-existing job
request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(futurePartitionState, getRemainingTime());
fail("Did not fail with expected IllegalArgumentException");
} catch (IllegalArgumentException ignored) {
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.
the class Execution method deployToSlot.
public void deployToSlot(final SimpleSlot slot) throws JobException {
checkNotNull(slot);
// The more general check is the timeout of the deployment call
if (!slot.isAlive()) {
throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
}
// make sure exactly one deployment call happens from the correct state
// note: the transition from CREATED to DEPLOYING is for testing purposes only
ExecutionState previous = this.state;
if (previous == SCHEDULED || previous == CREATED) {
if (!transitionState(previous, DEPLOYING)) {
// this should actually not happen and indicates a race somewhere else
throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
}
} else {
// vertex may have been cancelled, or it was already scheduled
throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
}
try {
// good, we are allowed to deploy
if (!slot.setExecutedVertex(this)) {
throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
}
this.assignedResource = slot;
// race double check, did we fail/cancel and do we need to release the slot?
if (this.state != DEPLOYING) {
slot.releaseSlot();
return;
}
if (LOG.isInfoEnabled()) {
LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, getAssignedResourceLocation().getHostname()));
}
final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(attemptId, slot, taskState, attemptNumber);
// register this execution at the execution graph, to receive call backs
vertex.getExecutionGraph().registerExecution(this);
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final Future<Acknowledge> submitResultFuture = taskManagerGateway.submitTask(deployment, timeout);
submitResultFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {
@Override
public Void apply(Throwable failure) {
if (failure instanceof TimeoutException) {
String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a timeout of " + timeout, failure));
} else {
markFailed(failure);
}
return null;
}
}, executor);
} catch (Throwable t) {
markFailed(t);
ExceptionUtils.rethrow(t);
}
}
Aggregations