Search in sources :

Example 21 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class JobClient method submitJobDetached.

/**
	 * Submits a job in detached mode. The method sends the JobGraph to the
	 * JobManager and waits for the answer whether the job could be started or not.
	 *
	 * @param jobManagerGateway Gateway to the JobManager which will execute the jobs
	 * @param config The cluster wide configuration.
	 * @param jobGraph The job
	 * @param timeout  Timeout in which the JobManager must have responded.
	 */
public static void submitJobDetached(ActorGateway jobManagerGateway, Configuration config, JobGraph jobGraph, FiniteDuration timeout, ClassLoader classLoader) throws JobExecutionException {
    checkNotNull(jobManagerGateway, "The jobManagerGateway must not be null.");
    checkNotNull(jobGraph, "The jobGraph must not be null.");
    checkNotNull(timeout, "The timeout must not be null.");
    LOG.info("Checking and uploading JAR files");
    try {
        jobGraph.uploadUserJars(jobManagerGateway, timeout, config);
    } catch (IOException e) {
        throw new JobSubmissionException(jobGraph.getJobID(), "Could not upload the program's JAR files to the JobManager.", e);
    }
    Object result;
    try {
        Future<Object> future = jobManagerGateway.ask(new JobManagerMessages.SubmitJob(jobGraph, // only receive the Acknowledge for the job submission message
        ListeningBehaviour.DETACHED), timeout);
        result = Await.result(future, timeout);
    } catch (TimeoutException e) {
        throw new JobTimeoutException(jobGraph.getJobID(), "JobManager did not respond within " + timeout.toString(), e);
    } catch (Throwable t) {
        throw new JobSubmissionException(jobGraph.getJobID(), "Failed to send job to JobManager: " + t.getMessage(), t.getCause());
    }
    if (result instanceof JobManagerMessages.JobSubmitSuccess) {
        JobID respondedID = ((JobManagerMessages.JobSubmitSuccess) result).jobId();
        // validate response
        if (!respondedID.equals(jobGraph.getJobID())) {
            throw new JobExecutionException(jobGraph.getJobID(), "JobManager responded for wrong Job. This Job: " + jobGraph.getJobID() + ", response: " + respondedID);
        }
    } else if (result instanceof JobManagerMessages.JobResultFailure) {
        try {
            SerializedThrowable t = ((JobManagerMessages.JobResultFailure) result).cause();
            throw t.deserializeError(classLoader);
        } catch (JobExecutionException e) {
            throw e;
        } catch (Throwable t) {
            throw new JobExecutionException(jobGraph.getJobID(), "JobSubmission failed: " + t.getMessage(), t);
        }
    } else {
        throw new JobExecutionException(jobGraph.getJobID(), "Unexpected response from JobManager: " + result);
    }
}
Also used : JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) IOException(java.io.IOException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable)

Example 22 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class JobClient method awaitJobResult.

/**
	 * Given a JobListeningContext, awaits the result of the job execution that this context is bound to
	 * @param listeningContext The listening context of the job execution
	 * @return The result of the execution
	 * @throws JobExecutionException if anything goes wrong while monitoring the job
	 */
public static JobExecutionResult awaitJobResult(JobListeningContext listeningContext) throws JobExecutionException {
    final JobID jobID = listeningContext.getJobID();
    final ActorRef jobClientActor = listeningContext.getJobClientActor();
    final Future<Object> jobSubmissionFuture = listeningContext.getJobResultFuture();
    final FiniteDuration askTimeout = listeningContext.getTimeout();
    // retrieves class loader if necessary
    final ClassLoader classLoader = listeningContext.getClassLoader();
    // ping the JobClientActor from time to time to check if it is still running
    while (!jobSubmissionFuture.isCompleted()) {
        try {
            Await.ready(jobSubmissionFuture, askTimeout);
        } catch (InterruptedException e) {
            throw new JobExecutionException(jobID, "Interrupted while waiting for job completion.");
        } catch (TimeoutException e) {
            try {
                Await.result(Patterns.ask(jobClientActor, // Ping the Actor to see if it is alive
                new Identify(true), Timeout.durationToTimeout(askTimeout)), askTimeout);
            // we got a reply, continue waiting for the job result
            } catch (Exception eInner) {
                // thus the health check failed
                if (!jobSubmissionFuture.isCompleted()) {
                    throw new JobExecutionException(jobID, "JobClientActor seems to have died before the JobExecutionResult could be retrieved.", eInner);
                }
            }
        }
    }
    final Object answer;
    try {
        // we have already awaited the result, zero time to wait here
        answer = Await.result(jobSubmissionFuture, Duration.Zero());
    } catch (Throwable throwable) {
        throw new JobExecutionException(jobID, "Couldn't retrieve the JobExecutionResult from the JobManager.", throwable);
    } finally {
        // failsafe shutdown of the client actor
        jobClientActor.tell(PoisonPill.getInstance(), ActorRef.noSender());
    }
    // second block handles the actual response
    if (answer instanceof JobManagerMessages.JobResultSuccess) {
        LOG.info("Job execution complete");
        SerializedJobExecutionResult result = ((JobManagerMessages.JobResultSuccess) answer).result();
        if (result != null) {
            try {
                return result.toJobExecutionResult(classLoader);
            } catch (Throwable t) {
                throw new JobExecutionException(jobID, "Job was successfully executed but JobExecutionResult could not be deserialized.");
            }
        } else {
            throw new JobExecutionException(jobID, "Job was successfully executed but result contained a null JobExecutionResult.");
        }
    } else if (answer instanceof JobManagerMessages.JobResultFailure) {
        LOG.info("Job execution failed");
        SerializedThrowable serThrowable = ((JobManagerMessages.JobResultFailure) answer).cause();
        if (serThrowable != null) {
            Throwable cause = serThrowable.deserializeError(classLoader);
            if (cause instanceof JobExecutionException) {
                throw (JobExecutionException) cause;
            } else {
                throw new JobExecutionException(jobID, "Job execution failed", cause);
            }
        } else {
            throw new JobExecutionException(jobID, "Job execution failed with null as failure cause.");
        }
    } else if (answer instanceof JobManagerMessages.JobNotFound) {
        throw new JobRetrievalException(((JobManagerMessages.JobNotFound) answer).jobID(), "Couldn't retrieve Job " + jobID + " because it was not running.");
    } else {
        throw new JobExecutionException(jobID, "Unknown answer from JobManager after submitting the job: " + answer);
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) Identify(akka.actor.Identify) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable)

Example 23 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class ExecutionGraphBuilder method buildGraph.

/**
	 * Builds the ExecutionGraph from the JobGraph.
	 * If a prior execution graph exists, the JobGraph will be attached. If no prior execution
	 * graph exists, then the JobGraph will become attach to a new empty execution graph.
	 */
public static ExecutionGraph buildGraph(@Nullable ExecutionGraph prior, JobGraph jobGraph, Configuration jobManagerConfig, ScheduledExecutorService futureExecutor, Executor ioExecutor, SlotProvider slotProvider, ClassLoader classLoader, CheckpointRecoveryFactory recoveryFactory, Time timeout, RestartStrategy restartStrategy, MetricGroup metrics, int parallelismForAutoMax, Logger log) throws JobExecutionException, JobException {
    checkNotNull(jobGraph, "job graph cannot be null");
    final String jobName = jobGraph.getName();
    final JobID jobId = jobGraph.getJobID();
    // create a new execution graph, if none exists so far
    final ExecutionGraph executionGraph;
    try {
        executionGraph = (prior != null) ? prior : new ExecutionGraph(futureExecutor, ioExecutor, jobId, jobName, jobGraph.getJobConfiguration(), jobGraph.getSerializedExecutionConfig(), timeout, restartStrategy, jobGraph.getUserJarBlobKeys(), jobGraph.getClasspaths(), slotProvider, classLoader, metrics);
    } catch (IOException e) {
        throw new JobException("Could not create the execution graph.", e);
    }
    // set the basic properties
    executionGraph.setScheduleMode(jobGraph.getScheduleMode());
    executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling());
    try {
        executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph));
    } catch (Throwable t) {
        log.warn("Cannot create JSON plan for job", t);
        // give the graph an empty plan
        executionGraph.setJsonPlan("{}");
    }
    // initialize the vertices that have a master initialization hook
    // file output formats create directories here, input formats create splits
    final long initMasterStart = System.nanoTime();
    log.info("Running initialization on master for job {} ({}).", jobName, jobId);
    for (JobVertex vertex : jobGraph.getVertices()) {
        String executableClass = vertex.getInvokableClassName();
        if (executableClass == null || executableClass.isEmpty()) {
            throw new JobSubmissionException(jobId, "The vertex " + vertex.getID() + " (" + vertex.getName() + ") has no invokable class.");
        }
        if (vertex.getParallelism() == ExecutionConfig.PARALLELISM_AUTO_MAX) {
            vertex.setParallelism(parallelismForAutoMax);
        }
        try {
            vertex.initializeOnMaster(classLoader);
        } catch (Throwable t) {
            throw new JobExecutionException(jobId, "Cannot initialize task '" + vertex.getName() + "': " + t.getMessage(), t);
        }
    }
    log.info("Successfully ran initialization on master in {} ms.", (System.nanoTime() - initMasterStart) / 1_000_000);
    // topologically sort the job vertices and attach the graph to the existing one
    List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
    if (log.isDebugEnabled()) {
        log.debug("Adding {} vertices from job graph {} ({}).", sortedTopology.size(), jobName, jobId);
    }
    executionGraph.attachJobGraph(sortedTopology);
    if (log.isDebugEnabled()) {
        log.debug("Successfully created execution graph from job graph {} ({}).", jobName, jobId);
    }
    // configure the state checkpointing
    JobSnapshottingSettings snapshotSettings = jobGraph.getSnapshotSettings();
    if (snapshotSettings != null) {
        List<ExecutionJobVertex> triggerVertices = idToVertex(snapshotSettings.getVerticesToTrigger(), executionGraph);
        List<ExecutionJobVertex> ackVertices = idToVertex(snapshotSettings.getVerticesToAcknowledge(), executionGraph);
        List<ExecutionJobVertex> confirmVertices = idToVertex(snapshotSettings.getVerticesToConfirm(), executionGraph);
        CompletedCheckpointStore completedCheckpoints;
        CheckpointIDCounter checkpointIdCounter;
        try {
            int maxNumberOfCheckpointsToRetain = jobManagerConfig.getInteger(CoreOptions.MAX_RETAINED_CHECKPOINTS);
            if (maxNumberOfCheckpointsToRetain <= 0) {
                // warning and use 1 as the default value if the setting in
                // state.checkpoints.max-retained-checkpoints is not greater than 0.
                log.warn("The setting for '{} : {}' is invalid. Using default value of {}", CoreOptions.MAX_RETAINED_CHECKPOINTS.key(), maxNumberOfCheckpointsToRetain, CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue());
                maxNumberOfCheckpointsToRetain = CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue();
            }
            completedCheckpoints = recoveryFactory.createCheckpointStore(jobId, maxNumberOfCheckpointsToRetain, classLoader);
            checkpointIdCounter = recoveryFactory.createCheckpointIDCounter(jobId);
        } catch (Exception e) {
            throw new JobExecutionException(jobId, "Failed to initialize high-availability checkpoint handler", e);
        }
        // Maximum number of remembered checkpoints
        int historySize = jobManagerConfig.getInteger(ConfigConstants.JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE, ConfigConstants.DEFAULT_JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE);
        CheckpointStatsTracker checkpointStatsTracker = new CheckpointStatsTracker(historySize, ackVertices, snapshotSettings, metrics);
        // The default directory for externalized checkpoints
        String externalizedCheckpointsDir = jobManagerConfig.getString(ConfigConstants.CHECKPOINTS_DIRECTORY_KEY, null);
        // load the state backend for checkpoint metadata.
        // if specified in the application, use from there, otherwise load from configuration
        final StateBackend metadataBackend;
        final StateBackend applicationConfiguredBackend = snapshotSettings.getDefaultStateBackend();
        if (applicationConfiguredBackend != null) {
            metadataBackend = applicationConfiguredBackend;
            log.info("Using application-defined state backend for checkpoint/savepoint metadata: {}.", applicationConfiguredBackend);
        } else {
            try {
                metadataBackend = AbstractStateBackend.loadStateBackendFromConfigOrCreateDefault(jobManagerConfig, classLoader, log);
            } catch (IllegalConfigurationException | IOException | DynamicCodeLoadingException e) {
                throw new JobExecutionException(jobId, "Could not instantiate configured state backend", e);
            }
        }
        executionGraph.enableCheckpointing(snapshotSettings.getCheckpointInterval(), snapshotSettings.getCheckpointTimeout(), snapshotSettings.getMinPauseBetweenCheckpoints(), snapshotSettings.getMaxConcurrentCheckpoints(), snapshotSettings.getExternalizedCheckpointSettings(), triggerVertices, ackVertices, confirmVertices, checkpointIdCounter, completedCheckpoints, externalizedCheckpointsDir, metadataBackend, checkpointStatsTracker);
    }
    return executionGraph;
}
Also used : CheckpointStatsTracker(org.apache.flink.runtime.checkpoint.CheckpointStatsTracker) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) IOException(java.io.IOException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) JobException(org.apache.flink.runtime.JobException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) StateBackend(org.apache.flink.runtime.state.StateBackend) AbstractStateBackend(org.apache.flink.runtime.state.AbstractStateBackend) JobException(org.apache.flink.runtime.JobException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) JobID(org.apache.flink.api.common.JobID) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore)

Example 24 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class LocalInputChannelTest method testConcurrentConsumeMultiplePartitions.

/**
	 * Tests the consumption of multiple subpartitions via local input channels.
	 *
	 * <p> Multiple producer tasks produce pipelined partitions, which are consumed by multiple
	 * tasks via local input channels.
	 */
@Test
public void testConcurrentConsumeMultiplePartitions() throws Exception {
    // Config
    final int parallelism = 32;
    final int producerBufferPoolSize = parallelism + 1;
    final int numberOfBuffersPerChannel = 1024;
    checkArgument(parallelism >= 1);
    checkArgument(producerBufferPoolSize >= parallelism);
    checkArgument(numberOfBuffersPerChannel >= 1);
    // Setup
    // One thread per produced partition and one per consumer
    final ExecutorService executor = Executors.newFixedThreadPool(2 * parallelism);
    final NetworkBufferPool networkBuffers = new NetworkBufferPool((parallelism * producerBufferPoolSize) + (parallelism * parallelism), TestBufferFactory.BUFFER_SIZE, MemoryType.HEAP);
    final ResultPartitionConsumableNotifier partitionConsumableNotifier = mock(ResultPartitionConsumableNotifier.class);
    final TaskActions taskActions = mock(TaskActions.class);
    final IOManager ioManager = mock(IOManager.class);
    final JobID jobId = new JobID();
    final ResultPartitionManager partitionManager = new ResultPartitionManager();
    final ResultPartitionID[] partitionIds = new ResultPartitionID[parallelism];
    final TestPartitionProducer[] partitionProducers = new TestPartitionProducer[parallelism];
    // Create all partitions
    for (int i = 0; i < parallelism; i++) {
        partitionIds[i] = new ResultPartitionID();
        final ResultPartition partition = new ResultPartition("Test Name", taskActions, jobId, partitionIds[i], ResultPartitionType.PIPELINED, parallelism, parallelism, partitionManager, partitionConsumableNotifier, ioManager, true);
        // Create a buffer pool for this partition
        partition.registerBufferPool(networkBuffers.createBufferPool(producerBufferPoolSize, producerBufferPoolSize));
        // Create the producer
        partitionProducers[i] = new TestPartitionProducer(partition, false, new TestPartitionProducerBufferSource(parallelism, partition.getBufferProvider(), numberOfBuffersPerChannel));
        // Register with the partition manager in order to allow the local input channels to
        // request their respective partitions.
        partitionManager.registerResultPartition(partition);
    }
    // Test
    try {
        // Submit producer tasks
        List<Future<?>> results = Lists.newArrayListWithCapacity(parallelism + 1);
        for (int i = 0; i < parallelism; i++) {
            results.add(executor.submit(partitionProducers[i]));
        }
        // Submit consumer
        for (int i = 0; i < parallelism; i++) {
            results.add(executor.submit(new TestLocalInputChannelConsumer(i, parallelism, numberOfBuffersPerChannel, networkBuffers.createBufferPool(parallelism, parallelism), partitionManager, new TaskEventDispatcher(), partitionIds)));
        }
        // Wait for all to finish
        for (Future<?> result : results) {
            result.get();
        }
    } finally {
        networkBuffers.destroy();
        executor.shutdown();
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TestPartitionProducer(org.apache.flink.runtime.io.network.util.TestPartitionProducer) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 25 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class LocalInputChannelTest method testConcurrentReleaseAndRetriggerPartitionRequest.

/**
	 * Verifies that concurrent release via the SingleInputGate and re-triggering
	 * of a partition request works smoothly.
	 *
	 * - SingleInputGate acquires its request lock and tries to release all
	 * registered channels. When releasing a channel, it needs to acquire
	 * the channel's shared request-release lock.
	 * - If a LocalInputChannel concurrently retriggers a partition request via
	 * a Timer Thread it acquires the channel's request-release lock and calls
	 * the retrigger callback on the SingleInputGate, which again tries to
	 * acquire the gate's request lock.
	 *
	 * For certain timings this obviously leads to a deadlock. This test reliably
	 * reproduced such a timing (reported in FLINK-5228). This test is pretty much
	 * testing the buggy implementation and has not much more general value. If it
	 * becomes obsolete at some point (future greatness ;)), feel free to remove it.
	 *
	 * The fix in the end was to to not acquire the channels lock when releasing it
	 * and/or not doing any input gate callbacks while holding the channel's lock.
	 * I decided to do both.
	 */
@Test
public void testConcurrentReleaseAndRetriggerPartitionRequest() throws Exception {
    final SingleInputGate gate = new SingleInputGate("test task name", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
    when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenAnswer(new Answer<ResultSubpartitionView>() {

        @Override
        public ResultSubpartitionView answer(InvocationOnMock invocationOnMock) throws Throwable {
            // Sleep here a little to give the releaser Thread
            // time to acquire the input gate lock. We throw
            // the Exception to retrigger the request.
            Thread.sleep(100);
            throw new PartitionNotFoundException(new ResultPartitionID());
        }
    });
    final LocalInputChannel channel = new LocalInputChannel(gate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), 1, 1, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    gate.setInputChannel(new IntermediateResultPartitionID(), channel);
    Thread releaser = new Thread() {

        @Override
        public void run() {
            try {
                gate.releaseAllResources();
            } catch (IOException ignored) {
            }
        }
    };
    Thread requester = new Thread() {

        @Override
        public void run() {
            try {
                channel.requestSubpartition(0);
            } catch (IOException | InterruptedException ignored) {
            }
        }
    };
    requester.start();
    releaser.start();
    releaser.join();
    requester.join();
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ResultSubpartitionView(org.apache.flink.runtime.io.network.partition.ResultSubpartitionView) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) IOException(java.io.IOException) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) InvocationOnMock(org.mockito.invocation.InvocationOnMock) BufferAvailabilityListener(org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener) BufferProvider(org.apache.flink.runtime.io.network.buffer.BufferProvider) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)335 Test (org.junit.Test)274 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)88 IOException (java.io.IOException)74 Configuration (org.apache.flink.configuration.Configuration)72 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)61 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)48 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)47 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)44 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)38 ArrayList (java.util.ArrayList)37 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)32 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)31 HashMap (java.util.HashMap)29 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)29 FiniteDuration (scala.concurrent.duration.FiniteDuration)28 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)24 File (java.io.File)23 UUID (java.util.UUID)23