Search in sources :

Example 6 with ProgramDescriptor

use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.

the class ProgramNotificationSubscriberService method handleClusterEvent.

/**
 * Handles a notification related to cluster operations.
 *
 * @param programRunId program run id from the event
 * @param clusterStatus cluster status from the event
 * @param notification the notification to process
 * @param messageIdBytes the unique ID for the notification message
 * @param appMetadataStore the data table to use
 * @param context the table context for performing table operations
 * @return an {@link Optional} of {@link Runnable} to carry a task to execute after handling of this event completed.
 *         See {@link #postProcess()} for details.
 * @throws IOException if failed to read/write to the app metadata store.
 */
private Optional<Runnable> handleClusterEvent(ProgramRunId programRunId, ProgramRunClusterStatus clusterStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, StructuredTableContext context) throws IOException {
    Map<String, String> properties = notification.getProperties();
    ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
    String userId = properties.get(ProgramOptionConstants.USER_ID);
    long endTs = getTimeSeconds(properties, ProgramOptionConstants.CLUSTER_END_TIME);
    ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
    switch(clusterStatus) {
        case PROVISIONING:
            appMetadataStore.recordProgramProvisioning(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
            ProvisionRequest provisionRequest = new ProvisionRequest(programRunId, programOptions, programDescriptor, userId);
            return Optional.of(provisioningService.provision(provisionRequest, context));
        case PROVISIONED:
            Cluster cluster = GSON.fromJson(properties.get(ProgramOptionConstants.CLUSTER), Cluster.class);
            appMetadataStore.recordProgramProvisioned(programRunId, cluster.getNodes().size(), messageIdBytes);
            // Update the ProgramOptions system arguments to include information needed for program execution
            Map<String, String> systemArgs = new HashMap<>(programOptions.getArguments().asMap());
            systemArgs.put(ProgramOptionConstants.USER_ID, properties.get(ProgramOptionConstants.USER_ID));
            systemArgs.put(ProgramOptionConstants.CLUSTER, properties.get(ProgramOptionConstants.CLUSTER));
            systemArgs.put(ProgramOptionConstants.SECURE_KEYS_DIR, properties.get(ProgramOptionConstants.SECURE_KEYS_DIR));
            ProgramOptions newProgramOptions = new SimpleProgramOptions(programOptions.getProgramId(), new BasicArguments(systemArgs), programOptions.getUserArguments());
            // Publish the program STARTING state before starting the program
            programStateWriter.start(programRunId, newProgramOptions, null, programDescriptor);
            // emit provisioning time metric
            long provisioningTime = System.currentTimeMillis() / 1000 - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
            SystemArguments.getProfileIdFromArgs(programRunId.getNamespaceId(), systemArgs).ifPresent(profileId -> emitProvisioningTimeMetric(programRunId, profileId, programOptions, provisioningTime));
            break;
        case DEPROVISIONING:
            RunRecordDetail recordedMeta = appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
            // or an invalid state transition. In both cases, we should not try to deprovision the cluster.
            if (recordedMeta != null) {
                return Optional.of(provisioningService.deprovision(programRunId, context));
            }
            break;
        case DEPROVISIONED:
            appMetadataStore.recordProgramDeprovisioned(programRunId, endTs, messageIdBytes);
            break;
        case ORPHANED:
            appMetadataStore.recordProgramOrphaned(programRunId, endTs, messageIdBytes);
            break;
    }
    return Optional.empty();
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProvisionRequest(io.cdap.cdap.internal.provision.ProvisionRequest) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions)

Example 7 with ProgramDescriptor

use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.

the class ProgramNotificationSubscriberService method handleProgramEvent.

private void handleProgramEvent(ProgramRunId programRunId, ProgramRunStatus programRunStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, ProgramHeartbeatTable programHeartbeatTable, List<Runnable> runnables) throws Exception {
    LOG.trace("Processing program status notification: {}", notification);
    Map<String, String> properties = notification.getProperties();
    String twillRunId = notification.getProperties().get(ProgramOptionConstants.TWILL_RUN_ID);
    RunRecordDetail recordedRunRecord;
    switch(programRunStatus) {
        case STARTING:
            try {
                RunRecordDetail runRecordDetail = appMetadataStore.getRun(programRunId);
                if (runRecordDetail != null && runRecordDetail.getStatus() != ProgramRunStatus.PENDING && runRecordDetail.getStatus() != ProgramRunStatus.STARTING) {
                    // This is an invalid state transition happening. Valid state transitions are:
                    // PENDING => STARTING : normal state transition
                    // STARTING => STARTING : state transition after app-fabric restart
                    LOG.debug("Ignoring unexpected request to transition program run {} from {} state to program " + "STARTING state.", programRunId, runRecordDetail.getStatus());
                    return;
                }
            } catch (IllegalStateException ex) {
                LOG.error("Request to transition program run {} from non-existent state to program STARTING state " + "but multiple run IDs exist.", programRunId);
            }
            String systemArgumentsString = properties.get(ProgramOptionConstants.SYSTEM_OVERRIDES);
            Map<String, String> systemArguments = systemArgumentsString == null ? Collections.emptyMap() : GSON.fromJson(systemArgumentsString, STRING_STRING_MAP);
            boolean isInWorkflow = systemArguments.containsKey(ProgramOptionConstants.WORKFLOW_NAME);
            boolean skipProvisioning = Boolean.parseBoolean(systemArguments.get(ProgramOptionConstants.SKIP_PROVISIONING));
            ProgramOptions prgOptions = ProgramOptions.fromNotification(notification, GSON);
            ProgramDescriptor prgDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
            // state changes into Starting.
            if (isInWorkflow || skipProvisioning) {
                appMetadataStore.recordProgramProvisioning(programRunId, prgOptions.getUserArguments().asMap(), prgOptions.getArguments().asMap(), messageIdBytes, prgDescriptor.getArtifactId().toApiArtifactId());
                appMetadataStore.recordProgramProvisioned(programRunId, 0, messageIdBytes);
            } else {
                runnables.add(() -> {
                    String oldUser = SecurityRequestContext.getUserId();
                    try {
                        SecurityRequestContext.setUserId(prgOptions.getArguments().getOption(ProgramOptionConstants.USER_ID));
                        try {
                            programLifecycleService.startInternal(prgDescriptor, prgOptions, programRunId);
                        } catch (Exception e) {
                            LOG.error("Failed to start program {}", programRunId, e);
                            programStateWriter.error(programRunId, e);
                        }
                    } finally {
                        SecurityRequestContext.setUserId(oldUser);
                    }
                });
            }
            recordedRunRecord = appMetadataStore.recordProgramStart(programRunId, twillRunId, systemArguments, messageIdBytes);
            writeToHeartBeatTable(recordedRunRecord, RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS), programHeartbeatTable);
            break;
        case RUNNING:
            long logicalStartTimeSecs = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.LOGICAL_START_TIME);
            if (logicalStartTimeSecs == -1) {
                LOG.warn("Ignore program running notification for program {} without {} specified, {}", programRunId, ProgramOptionConstants.LOGICAL_START_TIME, notification);
                return;
            }
            recordedRunRecord = appMetadataStore.recordProgramRunning(programRunId, logicalStartTimeSecs, twillRunId, messageIdBytes);
            writeToHeartBeatTable(recordedRunRecord, logicalStartTimeSecs, programHeartbeatTable);
            runRecordMonitorService.removeRequest(programRunId, true);
            long startDelayTime = logicalStartTimeSecs - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
            emitStartingTimeMetric(programRunId, startDelayTime);
            break;
        case SUSPENDED:
            long suspendTime = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.SUSPEND_TIME);
            // since we are adding suspend time recently, there might be old suspended notifications for which time
            // can be -1.
            recordedRunRecord = appMetadataStore.recordProgramSuspend(programRunId, messageIdBytes, suspendTime);
            writeToHeartBeatTable(recordedRunRecord, suspendTime, programHeartbeatTable);
            break;
        case RESUMING:
            long resumeTime = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.RESUME_TIME);
            // since we are adding suspend time recently, there might be old suspended notifications for which time
            // can be -1.
            recordedRunRecord = appMetadataStore.recordProgramResumed(programRunId, messageIdBytes, resumeTime);
            writeToHeartBeatTable(recordedRunRecord, resumeTime, programHeartbeatTable);
            break;
        case STOPPING:
            Map<String, String> notificationProperties = notification.getProperties();
            long stoppingTsSecs = getTimeSeconds(notificationProperties, ProgramOptionConstants.STOPPING_TIME);
            if (stoppingTsSecs == -1L) {
                LOG.warn("Ignore program stopping notification for program {} without {} specified, {}", programRunId, ProgramOptionConstants.STOPPING_TIME, notification);
                return;
            }
            long terminateTsSecs = getTimeSeconds(notificationProperties, ProgramOptionConstants.TERMINATE_TIME);
            recordedRunRecord = appMetadataStore.recordProgramStopping(programRunId, messageIdBytes, stoppingTsSecs, terminateTsSecs);
            writeToHeartBeatTable(recordedRunRecord, stoppingTsSecs, programHeartbeatTable);
            break;
        case COMPLETED:
        case KILLED:
        case FAILED:
            recordedRunRecord = handleProgramCompletion(appMetadataStore, programHeartbeatTable, programRunId, programRunStatus, notification, messageIdBytes, runnables);
            break;
        case REJECTED:
            ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
            ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
            recordedRunRecord = appMetadataStore.recordProgramRejected(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
            writeToHeartBeatTable(recordedRunRecord, RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS), programHeartbeatTable);
            getEmitMetricsRunnable(programRunId, recordedRunRecord, Constants.Metrics.Program.PROGRAM_REJECTED_RUNS, null).ifPresent(runnables::add);
            runRecordMonitorService.removeRequest(programRunId, true);
            break;
        default:
            // This should not happen
            LOG.error("Unsupported program status {} for program {}, {}", programRunStatus, programRunId, notification);
            return;
    }
    if (recordedRunRecord != null) {
        // We need to publish the message so that the trigger subscriber can pick it up and start the trigger if
        // necessary
        publishRecordedStatus(notification, programRunId, recordedRunRecord.getStatus());
        // publish the deprovisioning event(s).
        if (programRunStatus.isEndState() && programRunStatus != ProgramRunStatus.REJECTED) {
            // if this is a preview run or a program within a workflow, we don't actually need to de-provision the cluster.
            // instead, we just record the state as deprovisioned without notifying the provisioner
            // and we will emit the program status metrics for it
            boolean isInWorkflow = recordedRunRecord.getSystemArgs().containsKey(ProgramOptionConstants.WORKFLOW_NAME);
            boolean skipProvisioning = Boolean.parseBoolean(recordedRunRecord.getSystemArgs().get(ProgramOptionConstants.SKIP_PROVISIONING));
            if (isInWorkflow || skipProvisioning) {
                appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
                appMetadataStore.recordProgramDeprovisioned(programRunId, null, messageIdBytes);
            } else {
                provisionerNotifier.deprovisioning(programRunId);
            }
        }
    }
}
Also used : RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) JsonSyntaxException(com.google.gson.JsonSyntaxException) IOException(java.io.IOException) TableNotFoundException(io.cdap.cdap.spi.data.TableNotFoundException)

Example 8 with ProgramDescriptor

use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.

the class CoreSchedulerServiceTest method testProgramEvents.

@Test
@Category(XSlowTests.class)
public void testProgramEvents() throws Exception {
    // Deploy the app
    deploy(AppWithMultipleSchedules.class, 200);
    CConfiguration cConf = getInjector().getInstance(CConfiguration.class);
    TopicId programEventTopic = NamespaceId.SYSTEM.topic(cConf.get(Constants.AppFabric.PROGRAM_STATUS_RECORD_EVENT_TOPIC));
    ProgramStateWriter programStateWriter = new MessagingProgramStateWriter(cConf, messagingService);
    // These notifications should not trigger the program
    ProgramRunId anotherWorkflowRun = ANOTHER_WORKFLOW.run(RunIds.generate());
    ArtifactId artifactId = ANOTHER_WORKFLOW.getNamespaceId().artifact("test", "1.0").toApiArtifactId();
    ApplicationSpecification appSpec = new DefaultApplicationSpecification(AppWithMultipleSchedules.NAME, ApplicationId.DEFAULT_VERSION, ProjectInfo.getVersion().toString(), "desc", null, artifactId, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
    ProgramDescriptor programDescriptor = new ProgramDescriptor(anotherWorkflowRun.getParent(), appSpec);
    BasicArguments systemArgs = new BasicArguments(ImmutableMap.of(ProgramOptionConstants.SKIP_PROVISIONING, Boolean.TRUE.toString()));
    ProgramOptions programOptions = new SimpleProgramOptions(anotherWorkflowRun.getParent(), systemArgs, new BasicArguments(), false);
    programStateWriter.start(anotherWorkflowRun, programOptions, null, programDescriptor);
    programStateWriter.running(anotherWorkflowRun, null);
    long lastProcessed = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
    programStateWriter.error(anotherWorkflowRun, null);
    waitUntilProcessed(programEventTopic, lastProcessed);
    ProgramRunId someWorkflowRun = SOME_WORKFLOW.run(RunIds.generate());
    programDescriptor = new ProgramDescriptor(someWorkflowRun.getParent(), appSpec);
    programStateWriter.start(someWorkflowRun, new SimpleProgramOptions(someWorkflowRun.getParent(), systemArgs, new BasicArguments()), null, programDescriptor);
    programStateWriter.running(someWorkflowRun, null);
    lastProcessed = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
    programStateWriter.killed(someWorkflowRun);
    waitUntilProcessed(programEventTopic, lastProcessed);
    Assert.assertEquals(0, getRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.ALL));
    // Enable the schedule
    scheduler.enableSchedule(APP_MULT_ID.schedule(AppWithMultipleSchedules.WORKFLOW_COMPLETED_SCHEDULE));
    // Start a program with user arguments
    startProgram(ANOTHER_WORKFLOW, ImmutableMap.of(AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_KEY, AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_VALUE), 200);
    // Wait for a completed run record
    waitForCompleteRuns(1, TRIGGERED_WORKFLOW);
    assertProgramRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.COMPLETED, 1);
    RunRecord run = getProgramRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.COMPLETED).get(0);
    Map<String, List<WorkflowTokenDetail.NodeValueDetail>> tokenData = getWorkflowToken(TRIGGERED_WORKFLOW, run.getPid(), null, null).getTokenData();
    // There should be 2 entries in tokenData
    Assert.assertEquals(2, tokenData.size());
    // The value of TRIGGERED_RUNTIME_ARG_KEY should be ANOTHER_RUNTIME_ARG_VALUE from the triggering workflow
    Assert.assertEquals(AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_VALUE, tokenData.get(AppWithMultipleSchedules.TRIGGERED_RUNTIME_ARG_KEY).get(0).getValue());
    // The value of TRIGGERED_TOKEN_KEY should be ANOTHER_TOKEN_VALUE from the triggering workflow
    Assert.assertEquals(AppWithMultipleSchedules.ANOTHER_TOKEN_VALUE, tokenData.get(AppWithMultipleSchedules.TRIGGERED_TOKEN_KEY).get(0).getValue());
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) RunRecord(io.cdap.cdap.proto.RunRecord) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) TopicId(io.cdap.cdap.proto.id.TopicId) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) WorkflowTokenDetail(io.cdap.cdap.proto.WorkflowTokenDetail) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 9 with ProgramDescriptor

use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.

the class MapReduceTaskContextProvider method createProgram.

/**
 * Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using
 * the given program ClassLoader.
 */
private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) {
    Location programLocation;
    LocationFactory locationFactory = new LocalLocationFactory();
    // Use the program jar location regardless if local or distributed, since it is valid for both
    programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI());
    return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader);
}
Also used : DefaultProgram(io.cdap.cdap.app.program.DefaultProgram) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) File(java.io.File) Location(org.apache.twill.filesystem.Location) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory)

Example 10 with ProgramDescriptor

use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.

the class ProgramNotificationSubscriberServiceTest method testWorkflowInnerPrograms.

@Test
public void testWorkflowInnerPrograms() throws Exception {
    AppFabricTestHelper.deployApplication(Id.Namespace.DEFAULT, ProgramStateWorkflowApp.class, null, cConf);
    ProgramRunId workflowRunId = NamespaceId.DEFAULT.app(ProgramStateWorkflowApp.class.getSimpleName()).workflow(ProgramStateWorkflowApp.ProgramStateWorkflow.class.getSimpleName()).run(RunIds.generate());
    ApplicationSpecification appSpec = TransactionRunners.run(transactionRunner, context -> {
        return AppMetadataStore.create(context).getApplication(workflowRunId.getParent().getParent()).getSpec();
    });
    ProgramDescriptor programDescriptor = new ProgramDescriptor(workflowRunId.getParent(), appSpec);
    // Start and run the workflow
    Map<String, String> systemArgs = new HashMap<>();
    systemArgs.put(ProgramOptionConstants.SKIP_PROVISIONING, Boolean.TRUE.toString());
    systemArgs.put(SystemArguments.PROFILE_NAME, ProfileId.NATIVE.getScopedName());
    programStateWriter.start(workflowRunId, new SimpleProgramOptions(workflowRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments()), null, programDescriptor);
    programStateWriter.running(workflowRunId, null);
    ProgramRunId mrRunId = workflowRunId.getParent().getParent().mr(ProgramStateWorkflowApp.ProgramStateMR.class.getSimpleName()).run(RunIds.generate());
    ProgramRunId sparkRunId = workflowRunId.getParent().getParent().spark(ProgramStateWorkflowApp.ProgramStateSpark.class.getSimpleName()).run(RunIds.generate());
    ProgramId sparkId2 = workflowRunId.getParent().getParent().spark(ProgramStateWorkflowApp.ProgramStateSpark2.class.getSimpleName());
    // Start and run the MR and Spark inside
    for (ProgramRunId programRunId : Arrays.asList(mrRunId, sparkRunId)) {
        workflowStateWriter.addWorkflowNodeState(workflowRunId, new WorkflowNodeStateDetail(programRunId.getProgram(), NodeStatus.STARTING));
        workflowStateWriter.addWorkflowNodeState(workflowRunId, new WorkflowNodeStateDetail(programRunId.getProgram(), NodeStatus.RUNNING));
        systemArgs = new HashMap<>(systemArgs);
        systemArgs.put(ProgramOptionConstants.RUN_ID, programRunId.getRun());
        systemArgs.put(ProgramOptionConstants.WORKFLOW_NAME, workflowRunId.getProgram());
        systemArgs.put(ProgramOptionConstants.WORKFLOW_RUN_ID, workflowRunId.getRun());
        systemArgs.put(ProgramOptionConstants.WORKFLOW_NODE_ID, programRunId.getProgram());
        systemArgs.put(ProgramOptionConstants.PROGRAM_NAME_IN_WORKFLOW, programRunId.getProgram());
        programStateWriter.start(programRunId, new SimpleProgramOptions(programRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments()), null, programDescriptor);
        programStateWriter.running(programRunId, null);
        // Wait for the inner program running
        Tasks.waitFor(ProgramRunStatus.RUNNING, () -> TransactionRunners.run(transactionRunner, context -> {
            AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
            RunRecordDetail meta = metadataStoreDataset.getRun(programRunId);
            if (meta == null) {
                return null;
            }
            return meta.getStatus();
        }), 10, TimeUnit.SECONDS);
    }
    // Stop the Spark normally
    programStateWriter.completed(sparkRunId);
    // Error out the Workflow without stopping the MR
    programStateWriter.error(workflowRunId, new IllegalStateException("Explicitly error out"));
    // Wait for the Workflow state changed to failed
    Tasks.waitFor(ProgramRunStatus.FAILED, () -> TransactionRunners.run(transactionRunner, context -> {
        AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
        RunRecordDetail meta = metadataStoreDataset.getRun(workflowRunId);
        if (meta == null) {
            return null;
        }
        return meta.getStatus();
    }), 10000, TimeUnit.SECONDS);
    // The MR run record should be changed to ERROR state as well (without race)
    TransactionRunners.run(transactionRunner, context -> {
        AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
        RunRecordDetail meta = metadataStoreDataset.getRun(mrRunId);
        Assert.assertNotNull(meta);
        Assert.assertEquals(ProgramRunStatus.FAILED, meta.getStatus());
    });
    // The Spark run record should stay as COMPLETED
    TransactionRunners.run(transactionRunner, context -> {
        AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
        RunRecordDetail meta = metadataStoreDataset.getRun(sparkRunId);
        Assert.assertNotNull(meta);
        Assert.assertEquals(ProgramRunStatus.COMPLETED, meta.getStatus());
    });
    // Since the Spark2 program hasn't been executed, there should be no run record
    TransactionRunners.run(transactionRunner, context -> {
        AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
        Map<ProgramRunId, RunRecordDetail> runs = metadataStoreDataset.getRuns(sparkId2, ProgramRunStatus.ALL, 0, Long.MAX_VALUE, 100, null);
        Assert.assertTrue(runs.isEmpty());
    });
}
Also used : RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) Arrays(java.util.Arrays) TransactionRunners(io.cdap.cdap.spi.data.transaction.TransactionRunners) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) TimeoutException(java.util.concurrent.TimeoutException) NodeStatus(io.cdap.cdap.api.workflow.NodeStatus) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) AppFabricTestHelper(io.cdap.cdap.internal.AppFabricTestHelper) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) After(org.junit.After) Map(java.util.Map) RunId(org.apache.twill.api.RunId) Tasks(io.cdap.cdap.common.utils.Tasks) AfterClass(org.junit.AfterClass) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) Id(io.cdap.cdap.common.id.Id) List(java.util.List) AggregationFunction(io.cdap.cdap.api.dataset.lib.cube.AggregationFunction) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) Constants(io.cdap.cdap.common.conf.Constants) ProfileId(io.cdap.cdap.proto.id.ProfileId) ProgramOptionConstants(io.cdap.cdap.internal.app.runtime.ProgramOptionConstants) BeforeClass(org.junit.BeforeClass) MetricStore(io.cdap.cdap.api.metrics.MetricStore) HashMap(java.util.HashMap) ProgramType(io.cdap.cdap.proto.ProgramType) ArrayList(java.util.ArrayList) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProgramHeartbeatTable(io.cdap.cdap.reporting.ProgramHeartbeatTable) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) Profile(io.cdap.cdap.proto.profile.Profile) MetricDataQuery(io.cdap.cdap.api.metrics.MetricDataQuery) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) WorkflowNodeStateDetail(io.cdap.cdap.proto.WorkflowNodeStateDetail) AppMetadataStore(io.cdap.cdap.internal.app.store.AppMetadataStore) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) WorkflowStateWriter(io.cdap.cdap.internal.app.runtime.workflow.WorkflowStateWriter) ProfileService(io.cdap.cdap.internal.profile.ProfileService) RunIds(io.cdap.cdap.common.app.RunIds) ProgramId(io.cdap.cdap.proto.id.ProgramId) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) Test(org.junit.Test) MetricTimeSeries(io.cdap.cdap.api.metrics.MetricTimeSeries) ProjectInfo(io.cdap.cdap.common.utils.ProjectInfo) ProgramRunClusterStatus(io.cdap.cdap.proto.ProgramRunClusterStatus) Injector(com.google.inject.Injector) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue) Assert(org.junit.Assert) Collections(java.util.Collections) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) AppMetadataStore(io.cdap.cdap.internal.app.store.AppMetadataStore) HashMap(java.util.HashMap) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowNodeStateDetail(io.cdap.cdap.proto.WorkflowNodeStateDetail) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Test(org.junit.Test)

Aggregations

ProgramDescriptor (io.cdap.cdap.app.program.ProgramDescriptor)66 BasicArguments (io.cdap.cdap.internal.app.runtime.BasicArguments)34 SimpleProgramOptions (io.cdap.cdap.internal.app.runtime.SimpleProgramOptions)34 Test (org.junit.Test)32 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)30 ProgramId (io.cdap.cdap.proto.id.ProgramId)30 ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)26 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)26 File (java.io.File)24 HashMap (java.util.HashMap)20 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)18 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)18 IOException (java.io.IOException)18 Injector (com.google.inject.Injector)14 SystemArguments (io.cdap.cdap.internal.app.runtime.SystemArguments)14 ProgramType (io.cdap.cdap.proto.ProgramType)14 NamespaceId (io.cdap.cdap.proto.id.NamespaceId)14 Collections (java.util.Collections)14 Location (org.apache.twill.filesystem.Location)14 Program (io.cdap.cdap.app.program.Program)12