Search in sources :

Example 1 with JobException

use of org.apache.flink.runtime.JobException in project flink by apache.

the class Execution method deployToSlot.

public void deployToSlot(final SimpleSlot slot) throws JobException {
    checkNotNull(slot);
    // The more general check is the timeout of the deployment call
    if (!slot.isAlive()) {
        throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
    }
    // make sure exactly one deployment call happens from the correct state
    // note: the transition from CREATED to DEPLOYING is for testing purposes only
    ExecutionState previous = this.state;
    if (previous == SCHEDULED || previous == CREATED) {
        if (!transitionState(previous, DEPLOYING)) {
            // this should actually not happen and indicates a race somewhere else
            throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
        }
    } else {
        // vertex may have been cancelled, or it was already scheduled
        throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
    }
    try {
        // good, we are allowed to deploy
        if (!slot.setExecutedVertex(this)) {
            throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
        }
        this.assignedResource = slot;
        // race double check, did we fail/cancel and do we need to release the slot?
        if (this.state != DEPLOYING) {
            slot.releaseSlot();
            return;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, getAssignedResourceLocation().getHostname()));
        }
        final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(attemptId, slot, taskState, attemptNumber);
        // register this execution at the execution graph, to receive call backs
        vertex.getExecutionGraph().registerExecution(this);
        final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
        final Future<Acknowledge> submitResultFuture = taskManagerGateway.submitTask(deployment, timeout);
        submitResultFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

            @Override
            public Void apply(Throwable failure) {
                if (failure instanceof TimeoutException) {
                    String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
                    markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a timeout of " + timeout, failure));
                } else {
                    markFailed(failure);
                }
                return null;
            }
        }, executor);
    } catch (Throwable t) {
        markFailed(t);
        ExceptionUtils.rethrow(t);
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) TimeoutException(java.util.concurrent.TimeoutException) JobException(org.apache.flink.runtime.JobException) JobException(org.apache.flink.runtime.JobException) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with JobException

use of org.apache.flink.runtime.JobException in project flink by apache.

the class ExecutionGraphBuilder method buildGraph.

/**
	 * Builds the ExecutionGraph from the JobGraph.
	 * If a prior execution graph exists, the JobGraph will be attached. If no prior execution
	 * graph exists, then the JobGraph will become attach to a new empty execution graph.
	 */
public static ExecutionGraph buildGraph(@Nullable ExecutionGraph prior, JobGraph jobGraph, Configuration jobManagerConfig, ScheduledExecutorService futureExecutor, Executor ioExecutor, SlotProvider slotProvider, ClassLoader classLoader, CheckpointRecoveryFactory recoveryFactory, Time timeout, RestartStrategy restartStrategy, MetricGroup metrics, int parallelismForAutoMax, Logger log) throws JobExecutionException, JobException {
    checkNotNull(jobGraph, "job graph cannot be null");
    final String jobName = jobGraph.getName();
    final JobID jobId = jobGraph.getJobID();
    // create a new execution graph, if none exists so far
    final ExecutionGraph executionGraph;
    try {
        executionGraph = (prior != null) ? prior : new ExecutionGraph(futureExecutor, ioExecutor, jobId, jobName, jobGraph.getJobConfiguration(), jobGraph.getSerializedExecutionConfig(), timeout, restartStrategy, jobGraph.getUserJarBlobKeys(), jobGraph.getClasspaths(), slotProvider, classLoader, metrics);
    } catch (IOException e) {
        throw new JobException("Could not create the execution graph.", e);
    }
    // set the basic properties
    executionGraph.setScheduleMode(jobGraph.getScheduleMode());
    executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling());
    try {
        executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph));
    } catch (Throwable t) {
        log.warn("Cannot create JSON plan for job", t);
        // give the graph an empty plan
        executionGraph.setJsonPlan("{}");
    }
    // initialize the vertices that have a master initialization hook
    // file output formats create directories here, input formats create splits
    final long initMasterStart = System.nanoTime();
    log.info("Running initialization on master for job {} ({}).", jobName, jobId);
    for (JobVertex vertex : jobGraph.getVertices()) {
        String executableClass = vertex.getInvokableClassName();
        if (executableClass == null || executableClass.isEmpty()) {
            throw new JobSubmissionException(jobId, "The vertex " + vertex.getID() + " (" + vertex.getName() + ") has no invokable class.");
        }
        if (vertex.getParallelism() == ExecutionConfig.PARALLELISM_AUTO_MAX) {
            vertex.setParallelism(parallelismForAutoMax);
        }
        try {
            vertex.initializeOnMaster(classLoader);
        } catch (Throwable t) {
            throw new JobExecutionException(jobId, "Cannot initialize task '" + vertex.getName() + "': " + t.getMessage(), t);
        }
    }
    log.info("Successfully ran initialization on master in {} ms.", (System.nanoTime() - initMasterStart) / 1_000_000);
    // topologically sort the job vertices and attach the graph to the existing one
    List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
    if (log.isDebugEnabled()) {
        log.debug("Adding {} vertices from job graph {} ({}).", sortedTopology.size(), jobName, jobId);
    }
    executionGraph.attachJobGraph(sortedTopology);
    if (log.isDebugEnabled()) {
        log.debug("Successfully created execution graph from job graph {} ({}).", jobName, jobId);
    }
    // configure the state checkpointing
    JobSnapshottingSettings snapshotSettings = jobGraph.getSnapshotSettings();
    if (snapshotSettings != null) {
        List<ExecutionJobVertex> triggerVertices = idToVertex(snapshotSettings.getVerticesToTrigger(), executionGraph);
        List<ExecutionJobVertex> ackVertices = idToVertex(snapshotSettings.getVerticesToAcknowledge(), executionGraph);
        List<ExecutionJobVertex> confirmVertices = idToVertex(snapshotSettings.getVerticesToConfirm(), executionGraph);
        CompletedCheckpointStore completedCheckpoints;
        CheckpointIDCounter checkpointIdCounter;
        try {
            int maxNumberOfCheckpointsToRetain = jobManagerConfig.getInteger(CoreOptions.MAX_RETAINED_CHECKPOINTS);
            if (maxNumberOfCheckpointsToRetain <= 0) {
                // warning and use 1 as the default value if the setting in
                // state.checkpoints.max-retained-checkpoints is not greater than 0.
                log.warn("The setting for '{} : {}' is invalid. Using default value of {}", CoreOptions.MAX_RETAINED_CHECKPOINTS.key(), maxNumberOfCheckpointsToRetain, CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue());
                maxNumberOfCheckpointsToRetain = CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue();
            }
            completedCheckpoints = recoveryFactory.createCheckpointStore(jobId, maxNumberOfCheckpointsToRetain, classLoader);
            checkpointIdCounter = recoveryFactory.createCheckpointIDCounter(jobId);
        } catch (Exception e) {
            throw new JobExecutionException(jobId, "Failed to initialize high-availability checkpoint handler", e);
        }
        // Maximum number of remembered checkpoints
        int historySize = jobManagerConfig.getInteger(ConfigConstants.JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE, ConfigConstants.DEFAULT_JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE);
        CheckpointStatsTracker checkpointStatsTracker = new CheckpointStatsTracker(historySize, ackVertices, snapshotSettings, metrics);
        // The default directory for externalized checkpoints
        String externalizedCheckpointsDir = jobManagerConfig.getString(ConfigConstants.CHECKPOINTS_DIRECTORY_KEY, null);
        // load the state backend for checkpoint metadata.
        // if specified in the application, use from there, otherwise load from configuration
        final StateBackend metadataBackend;
        final StateBackend applicationConfiguredBackend = snapshotSettings.getDefaultStateBackend();
        if (applicationConfiguredBackend != null) {
            metadataBackend = applicationConfiguredBackend;
            log.info("Using application-defined state backend for checkpoint/savepoint metadata: {}.", applicationConfiguredBackend);
        } else {
            try {
                metadataBackend = AbstractStateBackend.loadStateBackendFromConfigOrCreateDefault(jobManagerConfig, classLoader, log);
            } catch (IllegalConfigurationException | IOException | DynamicCodeLoadingException e) {
                throw new JobExecutionException(jobId, "Could not instantiate configured state backend", e);
            }
        }
        executionGraph.enableCheckpointing(snapshotSettings.getCheckpointInterval(), snapshotSettings.getCheckpointTimeout(), snapshotSettings.getMinPauseBetweenCheckpoints(), snapshotSettings.getMaxConcurrentCheckpoints(), snapshotSettings.getExternalizedCheckpointSettings(), triggerVertices, ackVertices, confirmVertices, checkpointIdCounter, completedCheckpoints, externalizedCheckpointsDir, metadataBackend, checkpointStatsTracker);
    }
    return executionGraph;
}
Also used : CheckpointStatsTracker(org.apache.flink.runtime.checkpoint.CheckpointStatsTracker) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) IOException(java.io.IOException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) JobException(org.apache.flink.runtime.JobException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) StateBackend(org.apache.flink.runtime.state.StateBackend) AbstractStateBackend(org.apache.flink.runtime.state.AbstractStateBackend) JobException(org.apache.flink.runtime.JobException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) JobID(org.apache.flink.api.common.JobID) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore)

Example 3 with JobException

use of org.apache.flink.runtime.JobException in project flink by apache.

the class ExecutionJobVertex method computeLocalInputSplitsPerTask.

// --------------------------------------------------------------------------------------------
//  Static / pre-assigned input splits
// --------------------------------------------------------------------------------------------
private List<LocatableInputSplit>[] computeLocalInputSplitsPerTask(InputSplit[] splits) throws JobException {
    final int numSubTasks = getParallelism();
    // sanity check
    if (numSubTasks > splits.length) {
        throw new JobException("Strictly local assignment requires at least as many splits as subtasks.");
    }
    // group the splits by host while preserving order per host
    Map<String, List<LocatableInputSplit>> splitsByHost = new HashMap<String, List<LocatableInputSplit>>();
    for (InputSplit split : splits) {
        // check that split has exactly one local host
        if (!(split instanceof LocatableInputSplit)) {
            throw new JobException("Invalid InputSplit type " + split.getClass().getCanonicalName() + ". " + "Strictly local assignment requires LocatableInputSplit");
        }
        LocatableInputSplit lis = (LocatableInputSplit) split;
        if (lis.getHostnames() == null) {
            throw new JobException("LocatableInputSplit has no host information. " + "Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        } else if (lis.getHostnames().length != 1) {
            throw new JobException("Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        }
        String hostName = lis.getHostnames()[0];
        if (hostName == null) {
            throw new JobException("For strictly local input split assignment, no null host names are allowed.");
        }
        List<LocatableInputSplit> hostSplits = splitsByHost.get(hostName);
        if (hostSplits == null) {
            hostSplits = new ArrayList<LocatableInputSplit>();
            splitsByHost.put(hostName, hostSplits);
        }
        hostSplits.add(lis);
    }
    int numHosts = splitsByHost.size();
    if (numSubTasks < numHosts) {
        throw new JobException("Strictly local split assignment requires at least as " + "many parallel subtasks as distinct split hosts. Please increase the parallelism " + "of DataSource " + this.getJobVertex().getName() + " to at least " + numHosts + ".");
    }
    // get list of hosts in deterministic order
    List<String> hosts = new ArrayList<String>(splitsByHost.keySet());
    Collections.sort(hosts);
    @SuppressWarnings("unchecked") List<LocatableInputSplit>[] subTaskSplitAssignment = (List<LocatableInputSplit>[]) new List<?>[numSubTasks];
    final int subtasksPerHost = numSubTasks / numHosts;
    final int hostsWithOneMore = numSubTasks % numHosts;
    int subtaskNum = 0;
    // over the subtasks
    for (int hostNum = 0; hostNum < numHosts; hostNum++) {
        String host = hosts.get(hostNum);
        List<LocatableInputSplit> splitsOnHost = splitsByHost.get(host);
        int numSplitsOnHost = splitsOnHost.size();
        // the number of subtasks to split this over.
        // NOTE: if the host has few splits, some subtasks will not get anything.
        int subtasks = Math.min(numSplitsOnHost, hostNum < hostsWithOneMore ? subtasksPerHost + 1 : subtasksPerHost);
        int splitsPerSubtask = numSplitsOnHost / subtasks;
        int subtasksWithOneMore = numSplitsOnHost % subtasks;
        int splitnum = 0;
        // go over the subtasks and grab a subrange of the input splits
        for (int i = 0; i < subtasks; i++) {
            int numSplitsForSubtask = (i < subtasksWithOneMore ? splitsPerSubtask + 1 : splitsPerSubtask);
            List<LocatableInputSplit> splitList;
            if (numSplitsForSubtask == numSplitsOnHost) {
                splitList = splitsOnHost;
            } else {
                splitList = new ArrayList<LocatableInputSplit>(numSplitsForSubtask);
                for (int k = 0; k < numSplitsForSubtask; k++) {
                    splitList.add(splitsOnHost.get(splitnum++));
                }
            }
            subTaskSplitAssignment[subtaskNum++] = splitList;
        }
    }
    return subTaskSplitAssignment;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JobException(org.apache.flink.runtime.JobException) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit)

Example 4 with JobException

use of org.apache.flink.runtime.JobException in project flink by apache.

the class ExecutionGraphMetricsTest method testExecutionGraphRestartTimeMetric.

/**
	 * This test tests that the restarting time metric correctly displays restarting times.
	 */
@Test
public void testExecutionGraphRestartTimeMetric() throws JobException, IOException, InterruptedException {
    final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    try {
        // setup execution graph with mocked scheduling logic
        int parallelism = 1;
        JobVertex jobVertex = new JobVertex("TestVertex");
        jobVertex.setParallelism(parallelism);
        jobVertex.setInvokableClass(NoOpInvokable.class);
        JobGraph jobGraph = new JobGraph("Test Job", jobVertex);
        Configuration config = new Configuration();
        config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
        config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestingReporter.class.getName());
        Configuration jobConfig = new Configuration();
        Time timeout = Time.seconds(10L);
        MetricRegistry metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
        assertTrue(metricRegistry.getReporters().size() == 1);
        MetricReporter reporter = metricRegistry.getReporters().get(0);
        assertTrue(reporter instanceof TestingReporter);
        TestingReporter testingReporter = (TestingReporter) reporter;
        MetricGroup metricGroup = new JobManagerMetricGroup(metricRegistry, "localhost");
        Scheduler scheduler = mock(Scheduler.class);
        ResourceID taskManagerId = ResourceID.generate();
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(taskManagerId);
        when(taskManagerLocation.getHostname()).thenReturn("localhost");
        TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
        Instance instance = mock(Instance.class);
        when(instance.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(instance.getTaskManagerID()).thenReturn(taskManagerId);
        when(instance.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        Slot rootSlot = mock(Slot.class);
        AllocatedSlot mockAllocatedSlot = mock(AllocatedSlot.class);
        when(mockAllocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
        SimpleSlot simpleSlot = mock(SimpleSlot.class);
        when(simpleSlot.isAlive()).thenReturn(true);
        when(simpleSlot.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(simpleSlot.getTaskManagerID()).thenReturn(taskManagerId);
        when(simpleSlot.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        when(simpleSlot.setExecutedVertex(Matchers.any(Execution.class))).thenReturn(true);
        when(simpleSlot.getRoot()).thenReturn(rootSlot);
        when(simpleSlot.getAllocatedSlot()).thenReturn(mockAllocatedSlot);
        FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
        future.complete(simpleSlot);
        when(scheduler.allocateSlot(any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
        when(rootSlot.getSlotNumber()).thenReturn(0);
        when(taskManagerGateway.submitTask(any(TaskDeploymentDescriptor.class), any(Time.class))).thenReturn(FlinkCompletableFuture.completed(Acknowledge.get()));
        TestingRestartStrategy testingRestartStrategy = new TestingRestartStrategy();
        ExecutionGraph executionGraph = new ExecutionGraph(executor, executor, jobGraph.getJobID(), jobGraph.getName(), jobConfig, new SerializedValue<ExecutionConfig>(null), timeout, testingRestartStrategy, Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), scheduler, getClass().getClassLoader(), metricGroup);
        // get restarting time metric
        Metric metric = testingReporter.getMetric(ExecutionGraph.RESTARTING_TIME_METRIC_NAME);
        assertNotNull(metric);
        assertTrue(metric instanceof Gauge);
        @SuppressWarnings("unchecked") Gauge<Long> restartingTime = (Gauge<Long>) metric;
        // check that the restarting time is 0 since it's the initial start
        assertTrue(0L == restartingTime.getValue());
        executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
        // start execution
        executionGraph.scheduleForExecution();
        assertTrue(0L == restartingTime.getValue());
        List<ExecutionAttemptID> executionIDs = new ArrayList<>();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        // tell execution graph that the tasks are in state running --> job status switches to state running
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(0L == restartingTime.getValue());
        // fail the job so that it goes into state restarting
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long firstRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        // wait some time so that the restarting time gauge shows a value different from 0
        Thread.sleep(50);
        long previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is monotonically increasing
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // check that we have measured some restarting time
        assertTrue(previousRestartingTime > 0);
        // restart job
        testingRestartStrategy.restartExecutionGraph();
        executionIDs.clear();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(firstRestartingTimestamp != 0);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time does not increase after we've reached the running state
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // fail job again
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long secondRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        assertTrue(firstRestartingTimestamp != secondRestartingTimestamp);
        Thread.sleep(50);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is increasing again
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        assertTrue(previousRestartingTime > 0);
        // now lets fail the job while it is in restarting and see whether the restarting time then stops to increase
        // for this to work, we have to use a SuppressRestartException
        executionGraph.fail(new SuppressRestartsException(new Exception()));
        assertEquals(JobStatus.FAILED, executionGraph.getState());
        previousRestartingTime = restartingTime.getValue();
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
    } finally {
        executor.shutdownNow();
    }
}
Also used : JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) MetricGroup(org.apache.flink.metrics.MetricGroup) JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) ArrayList(java.util.ArrayList) Time(org.apache.flink.api.common.time.Time) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) Gauge(org.apache.flink.metrics.Gauge) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) JobException(org.apache.flink.runtime.JobException) IOException(java.io.IOException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) Slot(org.apache.flink.runtime.instance.Slot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) Metric(org.apache.flink.metrics.Metric) Test(org.junit.Test)

Example 5 with JobException

use of org.apache.flink.runtime.JobException in project flink by apache.

the class PointwisePatternTest method testLowToHigh.

private void testLowToHigh(int lowDop, int highDop) throws Exception {
    if (highDop < lowDop) {
        throw new IllegalArgumentException();
    }
    final int factor = highDop / lowDop;
    final int delta = highDop % lowDop == 0 ? 0 : 1;
    JobVertex v1 = new JobVertex("vertex1");
    JobVertex v2 = new JobVertex("vertex2");
    v1.setParallelism(lowDop);
    v2.setParallelism(highDop);
    v1.setInvokableClass(AbstractInvokable.class);
    v2.setInvokableClass(AbstractInvokable.class);
    v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2));
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
    try {
        eg.attachJobGraph(ordered);
    } catch (JobException e) {
        e.printStackTrace();
        fail("Job failed with exception: " + e.getMessage());
    }
    ExecutionJobVertex target = eg.getAllVertices().get(v2.getID());
    int[] timesUsed = new int[lowDop];
    for (ExecutionVertex ev : target.getTaskVertices()) {
        assertEquals(1, ev.getNumberOfInputs());
        ExecutionEdge[] inEdges = ev.getInputEdges(0);
        assertEquals(1, inEdges.length);
        timesUsed[inEdges[0].getSource().getPartitionNumber()]++;
    }
    for (int used : timesUsed) {
        assertTrue(used >= factor && used <= factor + delta);
    }
}
Also used : Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex)

Aggregations

JobException (org.apache.flink.runtime.JobException)20 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)17 ArrayList (java.util.ArrayList)15 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)15 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)15 NoRestartStrategy (org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy)14 Test (org.junit.Test)13 JobID (org.apache.flink.api.common.JobID)8 Configuration (org.apache.flink.configuration.Configuration)8 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 InputSplit (org.apache.flink.core.io.InputSplit)2 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)2 IntermediateDataSet (org.apache.flink.runtime.jobgraph.IntermediateDataSet)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 TaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway)2 URL (java.net.URL)1 HashSet (java.util.HashSet)1 List (java.util.List)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1