use of org.apache.flink.api.common.JobID in project flink by apache.
the class TaskManagerJobGroupTest method testCreateQueryServiceMetricInfo.
@Test
public void testCreateQueryServiceMetricInfo() {
JobID jid = new JobID();
MetricRegistry registry = new MetricRegistry(MetricRegistryConfiguration.defaultMetricRegistryConfiguration());
TaskManagerMetricGroup tm = new TaskManagerMetricGroup(registry, "host", "id");
TaskManagerJobMetricGroup job = new TaskManagerJobMetricGroup(registry, tm, jid, "jobname");
QueryScopeInfo.JobQueryScopeInfo info = job.createQueryServiceMetricInfo(new DummyCharacterFilter());
assertEquals("", info.scope);
assertEquals(jid.toString(), info.jobID);
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class RescalingITCase method testSavepointRescalingPartitionedOperatorState.
/**
* Tests rescaling of partitioned operator state. More specific, we test the mechanism with {@link ListCheckpointed}
* as it subsumes {@link org.apache.flink.streaming.api.checkpoint.CheckpointedFunction}.
*/
public void testSavepointRescalingPartitionedOperatorState(boolean scaleOut, OperatorCheckpointMethod checkpointMethod) throws Exception {
final int parallelism = scaleOut ? numSlots : numSlots / 2;
final int parallelism2 = scaleOut ? numSlots / 2 : numSlots;
final int maxParallelism = 13;
FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
JobID jobID = null;
ActorGateway jobManager = null;
int counterSize = Math.max(parallelism, parallelism2);
if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION || checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
PartitionedStateSource.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
PartitionedStateSource.CHECK_CORRECT_RESTORE = new int[counterSize];
} else {
PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE = new int[counterSize];
}
try {
jobManager = cluster.getLeaderGateway(deadline.timeLeft());
JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, checkpointMethod);
jobID = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
Object savepointResponse = null;
// wait until the operator is started
StateSourceBase.workStartedLatch.await();
while (deadline.hasTimeLeft()) {
Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
savepointResponse = Await.result(savepointPathFuture, waitingTime);
if (savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess) {
break;
}
System.out.println(savepointResponse);
}
assertTrue(savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
Await.ready(jobRemovedFuture, deadline.timeLeft());
// job successfully removed
jobID = null;
JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, checkpointMethod);
scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
jobID = scaledJobGraph.getJobID();
cluster.submitJobAndWait(scaledJobGraph, false);
int sumExp = 0;
int sumAct = 0;
if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION) {
for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
} else if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
sumExp *= parallelism2;
} else {
for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
}
assertEquals(sumExp, sumAct);
jobID = null;
} finally {
// clear any left overs from a possibly failed job
if (jobID != null && jobManager != null) {
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
try {
Await.ready(jobRemovedFuture, timeout);
} catch (TimeoutException | InterruptedException ie) {
fail("Failed while cleaning up the cluster.");
}
}
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.
/**
* FLINK-5985
*
* This test ensures we can restore from a savepoint under modifications to the job graph that only concern
* stateless operators.
*/
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
// Config
int numTaskManagers = 2;
int numSlotsPerTaskManager = 2;
int parallelism = 2;
// Test deadline
final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
final File tmpDir = CommonTestUtils.createTempDirectory();
final File savepointDir = new File(tmpDir, "savepoints");
TestingCluster flink = null;
String savepointPath;
try {
// Flink configuration
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
LOG.info("Flink configuration: " + config + ".");
// Start Flink
flink = new TestingCluster(config);
LOG.info("Starting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
final StatefulCounter statefulCounter = new StatefulCounter();
StatefulCounter.resetForTest(parallelism);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 4 * value;
}
}).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 2 * value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
JobID jobID = submissionResult.getJobID();
// wait for the Tasks to be ready
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
LOG.info("Retrieved savepoint: " + savepointPath + ".");
// Shut down the Flink cluster (thereby canceling the job)
LOG.info("Shutting down Flink cluster.");
flink.shutdown();
flink.awaitTermination();
} finally {
flink.shutdown();
flink.awaitTermination();
}
try {
LOG.info("Restarting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
// Reset static test helpers
StatefulCounter.resetForTest(parallelism);
// Gather all task deployment descriptors
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
// generate a modified job graph that adds a stateless op
env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
// Set the savepoint path
modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
// Submit the job
flink.submitJobDetached(modifiedJobGraph);
// Await state is restored
StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// Await some progress after restore
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
} finally {
flink.shutdown();
flink.awaitTermination();
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class KeyedOneInputStreamOperatorTestHarness method snapshotLegacy.
/**
*
*/
@Override
public StreamStateHandle snapshotLegacy(long checkpointId, long timestamp) throws Exception {
// simply use an in-memory handle
MemoryStateBackend backend = new MemoryStateBackend();
CheckpointStreamFactory streamFactory = backend.createStreamFactory(new JobID(), "test_op");
CheckpointStreamFactory.CheckpointStateOutputStream outStream = streamFactory.createCheckpointStateOutputStream(checkpointId, timestamp);
if (operator instanceof StreamCheckpointedOperator) {
((StreamCheckpointedOperator) operator).snapshotState(outStream, checkpointId, timestamp);
}
if (keyedStateBackend != null) {
RunnableFuture<KeyGroupsStateHandle> keyedSnapshotRunnable = keyedStateBackend.snapshot(checkpointId, timestamp, streamFactory, CheckpointOptions.forFullCheckpoint());
if (!keyedSnapshotRunnable.isDone()) {
Thread runner = new Thread(keyedSnapshotRunnable);
runner.start();
}
outStream.write(1);
ObjectOutputStream oos = new ObjectOutputStream(outStream);
oos.writeObject(keyedSnapshotRunnable.get());
oos.flush();
} else {
outStream.write(0);
}
return outStream.closeAndGetHandle();
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class JobClient method submitJobDetached.
/**
* Submits a job in detached mode. The method sends the JobGraph to the
* JobManager and waits for the answer whether the job could be started or not.
*
* @param jobManagerGateway Gateway to the JobManager which will execute the jobs
* @param config The cluster wide configuration.
* @param jobGraph The job
* @param timeout Timeout in which the JobManager must have responded.
*/
public static void submitJobDetached(ActorGateway jobManagerGateway, Configuration config, JobGraph jobGraph, FiniteDuration timeout, ClassLoader classLoader) throws JobExecutionException {
checkNotNull(jobManagerGateway, "The jobManagerGateway must not be null.");
checkNotNull(jobGraph, "The jobGraph must not be null.");
checkNotNull(timeout, "The timeout must not be null.");
LOG.info("Checking and uploading JAR files");
try {
jobGraph.uploadUserJars(jobManagerGateway, timeout, config);
} catch (IOException e) {
throw new JobSubmissionException(jobGraph.getJobID(), "Could not upload the program's JAR files to the JobManager.", e);
}
Object result;
try {
Future<Object> future = jobManagerGateway.ask(new JobManagerMessages.SubmitJob(jobGraph, // only receive the Acknowledge for the job submission message
ListeningBehaviour.DETACHED), timeout);
result = Await.result(future, timeout);
} catch (TimeoutException e) {
throw new JobTimeoutException(jobGraph.getJobID(), "JobManager did not respond within " + timeout.toString(), e);
} catch (Throwable t) {
throw new JobSubmissionException(jobGraph.getJobID(), "Failed to send job to JobManager: " + t.getMessage(), t.getCause());
}
if (result instanceof JobManagerMessages.JobSubmitSuccess) {
JobID respondedID = ((JobManagerMessages.JobSubmitSuccess) result).jobId();
// validate response
if (!respondedID.equals(jobGraph.getJobID())) {
throw new JobExecutionException(jobGraph.getJobID(), "JobManager responded for wrong Job. This Job: " + jobGraph.getJobID() + ", response: " + respondedID);
}
} else if (result instanceof JobManagerMessages.JobResultFailure) {
try {
SerializedThrowable t = ((JobManagerMessages.JobResultFailure) result).cause();
throw t.deserializeError(classLoader);
} catch (JobExecutionException e) {
throw e;
} catch (Throwable t) {
throw new JobExecutionException(jobGraph.getJobID(), "JobSubmission failed: " + t.getMessage(), t);
}
} else {
throw new JobExecutionException(jobGraph.getJobID(), "Unexpected response from JobManager: " + result);
}
}
Aggregations