Search in sources :

Example 6 with ProgramOptions

use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.

the class ProvisioningService method provision.

/**
 * Record that a cluster will be provisioned for a program run, returning a Runnable that will actually perform
 * the cluster provisioning. This method must be run within a transaction.
 * The task returned should only be executed after the transaction that ran this method has completed.
 * Running the returned Runnable will start the actual task using an executor within this service so that it can be
 * tracked and optionally cancelled using {@link #cancelProvisionTask(ProgramRunId)}. The caller does not need to
 * submit the runnable using their own executor.
 *
 * @param provisionRequest the provision request
 * @param context context for the transaction
 * @return runnable that will actually execute the cluster provisioning
 */
public Runnable provision(ProvisionRequest provisionRequest, StructuredTableContext context) throws IOException, InterruptedException {
    initializeLatch.await(120, TimeUnit.SECONDS);
    ProgramRunId programRunId = provisionRequest.getProgramRunId();
    ProgramOptions programOptions = provisionRequest.getProgramOptions();
    Map<String, String> args = programOptions.getArguments().asMap();
    String name = SystemArguments.getProfileProvisioner(args);
    Provisioner provisioner = provisionerInfo.get().provisioners.get(name);
    // any errors seen here will transition the state straight to deprovisioned since no cluster create was attempted
    if (provisioner == null) {
        runWithProgramLogging(programRunId, args, () -> LOG.error("Could not provision cluster for the run because provisioner {} does not exist.", name));
        programStateWriter.error(programRunId, new IllegalStateException("Provisioner does not exist."));
        provisionerNotifier.deprovisioned(programRunId);
        return () -> {
        };
    }
    // get plugin requirement information and check for capability to run on the provisioner
    Set<PluginRequirement> requirements = GSON.fromJson(args.get(ProgramOptionConstants.PLUGIN_REQUIREMENTS), PLUGIN_REQUIREMENT_SET_TYPE);
    if (requirements != null) {
        Set<PluginRequirement> unfulfilledRequirements = getUnfulfilledRequirements(provisioner.getCapabilities(), requirements);
        if (!unfulfilledRequirements.isEmpty()) {
            runWithProgramLogging(programRunId, args, () -> LOG.error(String.format("'%s' cannot be run using profile '%s' because the profile does not met all " + "plugin requirements. Following requirements were not meet by the listed " + "plugins: '%s'", programRunId.getProgram(), name, groupByRequirement(unfulfilledRequirements))));
            programStateWriter.error(programRunId, new IllegalArgumentException("Provisioner does not meet all the " + "requirements for the program to run."));
            provisionerNotifier.deprovisioned(programRunId);
            return () -> {
            };
        }
    }
    Map<String, String> properties = SystemArguments.getProfileProperties(args);
    ProvisioningOp provisioningOp = new ProvisioningOp(ProvisioningOp.Type.PROVISION, ProvisioningOp.Status.REQUESTING_CREATE);
    ProvisioningTaskInfo provisioningTaskInfo = new ProvisioningTaskInfo(programRunId, provisionRequest.getProgramDescriptor(), programOptions, properties, name, provisionRequest.getUser(), provisioningOp, createKeysDirectory(programRunId).toURI(), null);
    ProvisionerTable provisionerTable = new ProvisionerTable(context);
    provisionerTable.putTaskInfo(provisioningTaskInfo);
    return createProvisionTask(provisioningTaskInfo, provisioner);
}
Also used : PluginRequirement(io.cdap.cdap.internal.pipeline.PluginRequirement) Provisioner(io.cdap.cdap.runtime.spi.provisioner.Provisioner) TetheringProvisioner(io.cdap.cdap.internal.tethering.runtime.spi.provisioner.TetheringProvisioner) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions)

Example 7 with ProgramOptions

use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.

the class ProvisioningService method createProvisionTask.

private Runnable createProvisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner) {
    ProgramRunId programRunId = taskInfo.getProgramRunId();
    ProgramOptions programOptions = taskInfo.getProgramOptions();
    Map<String, String> systemArgs = programOptions.getArguments().asMap();
    ProvisionerContext context;
    try {
        SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), locationFactory.create(taskInfo.getSecureKeysDir()), createSSHKeyPair(taskInfo));
        context = createContext(cConf, programOptions, programRunId, taskInfo.getUser(), taskInfo.getProvisionerProperties(), sshContext);
    } catch (IOException e) {
        runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Failed to load ssh key. The run will be marked as failed.", e));
        programStateWriter.error(programRunId, new IllegalStateException("Failed to load ssh key.", e));
        provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
        return () -> {
        };
    } catch (InvalidMacroException e) {
        runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Could not evaluate macros while provisoning. " + "The run will be marked as failed.", e));
        programStateWriter.error(programRunId, new IllegalStateException("Could not evaluate macros while provisioning", e));
        provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
        return () -> {
        };
    }
    // TODO: (CDAP-13246) pick up timeout from profile instead of hardcoding
    ProvisioningTask task = new ProvisionTask(taskInfo, transactionRunner, provisioner, context, provisionerNotifier, programStateWriter, 300);
    ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.PROVISION);
    return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
        try {
            return task.executeOnce();
        } catch (InterruptedException e) {
            LOG.debug("Provision task for program run {} interrupted.", taskInfo.getProgramRunId());
            throw e;
        } catch (Exception e) {
            LOG.info("Provision task for program run {} failed.", taskInfo.getProgramRunId(), e);
            throw e;
        }
    }));
}
Also used : SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) ProvisioningTask(io.cdap.cdap.internal.provision.task.ProvisioningTask) IOException(java.io.IOException) ProvisionTask(io.cdap.cdap.internal.provision.task.ProvisionTask) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) NotFoundException(io.cdap.cdap.common.NotFoundException) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId)

Example 8 with ProgramOptions

use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.

the class RuntimeServiceMainTest method testRuntimeService.

@Test
public void testRuntimeService() throws Exception {
    ArtifactId artifactId = NamespaceId.DEFAULT.artifact("test", "1.0");
    ProgramRunId programRunId = NamespaceId.DEFAULT.app("app").worker("worker").run(RunIds.generate());
    Map<String, String> systemArgs = ImmutableMap.of(SystemArguments.PROFILE_PROVISIONER, NativeProvisioner.SPEC.getName(), SystemArguments.PROFILE_NAME, "default");
    ProgramOptions programOptions = new SimpleProgramOptions(programRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments());
    ProgramDescriptor programDescriptor = new ProgramDescriptor(programRunId.getParent(), null, artifactId);
    // Write out program state events to simulate program start
    Injector appFabricInjector = getServiceMainInstance(AppFabricServiceMain.class).getInjector();
    CConfiguration cConf = appFabricInjector.getInstance(CConfiguration.class);
    ProgramStatePublisher programStatePublisher = new MessagingProgramStatePublisher(appFabricInjector.getInstance(MessagingService.class), NamespaceId.SYSTEM.topic(cConf.get(Constants.AppFabric.PROGRAM_STATUS_RECORD_EVENT_TOPIC)), RetryStrategies.fromConfiguration(cConf, "system.program.state."));
    new MessagingProgramStateWriter(programStatePublisher).start(programRunId, programOptions, null, programDescriptor);
    Injector injector = getServiceMainInstance(RuntimeServiceMain.class).getInjector();
    TransactionRunner txRunner = injector.getInstance(TransactionRunner.class);
    // Should see a STARTING record in the runtime store
    Tasks.waitFor(ProgramRunStatus.STARTING, () -> {
        RunRecordDetail detail = TransactionRunners.run(txRunner, context -> {
            return AppMetadataStore.create(context).getRun(programRunId);
        });
        return detail == null ? null : detail.getStatus();
    }, 5, TimeUnit.SECONDS);
    ProgramStateWriter programStateWriter = createProgramStateWriter(injector, programRunId);
    // Write a running state. We should see a RUNNING record in the runtime store
    programStateWriter.running(programRunId, null);
    Tasks.waitFor(ProgramRunStatus.RUNNING, () -> {
        RunRecordDetail detail = TransactionRunners.run(txRunner, context -> {
            return AppMetadataStore.create(context).getRun(programRunId);
        });
        return detail == null ? null : detail.getStatus();
    }, 5, TimeUnit.SECONDS);
    // Write a complete state. The run record should be removed in the runtime store
    programStateWriter.completed(programRunId);
    Tasks.waitFor(true, () -> TransactionRunners.run(txRunner, context -> AppMetadataStore.create(context).getRun(programRunId) == null), 5, TimeUnit.SECONDS);
}
Also used : Retries(io.cdap.cdap.common.service.Retries) RetryStrategy(io.cdap.cdap.common.service.RetryStrategy) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) TransactionRunners(io.cdap.cdap.spi.data.transaction.TransactionRunners) BeforeClass(org.junit.BeforeClass) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) Notification(io.cdap.cdap.proto.Notification) Bytes(io.cdap.cdap.api.common.Bytes) RetryStrategies(io.cdap.cdap.common.service.RetryStrategies) TopicId(io.cdap.cdap.proto.id.TopicId) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) Gson(com.google.gson.Gson) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) Map(java.util.Map) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) NativeProvisioner(io.cdap.cdap.internal.provision.NativeProvisioner) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) AppMetadataStore(io.cdap.cdap.internal.app.store.AppMetadataStore) Tasks(io.cdap.cdap.common.utils.Tasks) Message(io.cdap.cdap.api.messaging.Message) ImmutableMap(com.google.common.collect.ImmutableMap) RunIds(io.cdap.cdap.common.app.RunIds) ProgramStatePublisher(io.cdap.cdap.internal.app.program.ProgramStatePublisher) RetryableException(io.cdap.cdap.api.retry.RetryableException) MessagingService(io.cdap.cdap.messaging.MessagingService) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) MessagingProgramStatePublisher(io.cdap.cdap.internal.app.program.MessagingProgramStatePublisher) RuntimeClient(io.cdap.cdap.internal.app.runtime.monitor.RuntimeClient) Test(org.junit.Test) IOException(java.io.IOException) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) StandardCharsets(java.nio.charset.StandardCharsets) MessageId(io.cdap.cdap.messaging.data.MessageId) Injector(com.google.inject.Injector) TimeUnit(java.util.concurrent.TimeUnit) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) Constants(io.cdap.cdap.common.conf.Constants) Collections(java.util.Collections) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) MessagingService(io.cdap.cdap.messaging.MessagingService) MessagingProgramStatePublisher(io.cdap.cdap.internal.app.program.MessagingProgramStatePublisher) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) Injector(com.google.inject.Injector) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) ProgramStatePublisher(io.cdap.cdap.internal.app.program.ProgramStatePublisher) MessagingProgramStatePublisher(io.cdap.cdap.internal.app.program.MessagingProgramStatePublisher) Test(org.junit.Test)

Example 9 with ProgramOptions

use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.

the class AbstractInMemoryProgramRunner method startAll.

/**
 * Starts all instances of a Program component.
 * @param program The program to run
 * @param options options for the program
 * @param numInstances number of component instances to start
 */
protected final ProgramController startAll(Program program, ProgramOptions options, int numInstances) {
    RunId runId = ProgramRunners.getRunId(options);
    Table<String, Integer, ProgramController> components = HashBasedTable.create();
    try {
        for (int instanceId = 0; instanceId < numInstances; instanceId++) {
            ProgramOptions componentOptions = createComponentOptions(instanceId, numInstances, runId, options);
            ProgramController controller = createProgramRunner().run(program, componentOptions);
            components.put(program.getName(), instanceId, controller);
        }
        return new InMemoryProgramController(components, program, options);
    } catch (Throwable t) {
        LOG.error("Failed to start all program instances", t);
        try {
            // Need to stop all started components
            Futures.successfulAsList(Iterables.transform(components.values(), new Function<ProgramController, ListenableFuture<?>>() {

                @Override
                public ListenableFuture<?> apply(ProgramController controller) {
                    return controller.stop();
                }
            })).get();
            throw Throwables.propagate(t);
        } catch (Exception e) {
            LOG.error("Failed to stop all program instances upon startup failure.", e);
            throw Throwables.propagate(e);
        }
    }
}
Also used : Function(com.google.common.base.Function) ProgramController(io.cdap.cdap.app.runtime.ProgramController) AbstractProgramController(io.cdap.cdap.internal.app.runtime.AbstractProgramController) RunId(org.apache.twill.api.RunId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions)

Example 10 with ProgramOptions

use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.

the class ProgramNotificationSubscriberService method handleClusterEvent.

/**
 * Handles a notification related to cluster operations.
 *
 * @param programRunId program run id from the event
 * @param clusterStatus cluster status from the event
 * @param notification the notification to process
 * @param messageIdBytes the unique ID for the notification message
 * @param appMetadataStore the data table to use
 * @param context the table context for performing table operations
 * @return an {@link Optional} of {@link Runnable} to carry a task to execute after handling of this event completed.
 *         See {@link #postProcess()} for details.
 * @throws IOException if failed to read/write to the app metadata store.
 */
private Optional<Runnable> handleClusterEvent(ProgramRunId programRunId, ProgramRunClusterStatus clusterStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, StructuredTableContext context) throws IOException, InterruptedException {
    Map<String, String> properties = notification.getProperties();
    ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
    String userId = properties.get(ProgramOptionConstants.USER_ID);
    long endTs = getTimeSeconds(properties, ProgramOptionConstants.CLUSTER_END_TIME);
    ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
    switch(clusterStatus) {
        case PROVISIONING:
            appMetadataStore.recordProgramProvisioning(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
            ProvisionRequest provisionRequest = new ProvisionRequest(programRunId, programOptions, programDescriptor, userId);
            return Optional.of(provisioningService.provision(provisionRequest, context));
        case PROVISIONED:
            Cluster cluster = GSON.fromJson(properties.get(ProgramOptionConstants.CLUSTER), Cluster.class);
            RunRecordDetail runRecord = appMetadataStore.recordProgramProvisioned(programRunId, cluster.getNodes().size(), messageIdBytes);
            // In that case, we end up with a provisioned message, but we don't want to start the program.
            if (runRecord == null || runRecord.getStatus() == ProgramRunStatus.STOPPING || runRecord.getStatus() == ProgramRunStatus.KILLED) {
                break;
            }
            // Update the ProgramOptions system arguments to include information needed for program execution
            Map<String, String> systemArgs = new HashMap<>(programOptions.getArguments().asMap());
            systemArgs.put(ProgramOptionConstants.USER_ID, properties.get(ProgramOptionConstants.USER_ID));
            systemArgs.put(ProgramOptionConstants.CLUSTER, properties.get(ProgramOptionConstants.CLUSTER));
            systemArgs.put(ProgramOptionConstants.SECURE_KEYS_DIR, properties.get(ProgramOptionConstants.SECURE_KEYS_DIR));
            ProgramOptions newProgramOptions = new SimpleProgramOptions(programOptions.getProgramId(), new BasicArguments(systemArgs), programOptions.getUserArguments());
            // Publish the program STARTING state before starting the program
            programStateWriter.start(programRunId, newProgramOptions, null, programDescriptor);
            // emit provisioning time metric
            long provisioningTime = System.currentTimeMillis() / 1000 - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
            SystemArguments.getProfileIdFromArgs(programRunId.getNamespaceId(), systemArgs).ifPresent(profileId -> emitProvisioningTimeMetric(programRunId, profileId, programOptions, provisioningTime));
            break;
        case DEPROVISIONING:
            RunRecordDetail recordedMeta = appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
            // or an invalid state transition. In both cases, we should not try to deprovision the cluster.
            if (recordedMeta != null) {
                return Optional.of(provisioningService.deprovision(programRunId, context));
            }
            break;
        case DEPROVISIONED:
            appMetadataStore.recordProgramDeprovisioned(programRunId, endTs, messageIdBytes);
            break;
        case ORPHANED:
            appMetadataStore.recordProgramOrphaned(programRunId, endTs, messageIdBytes);
            break;
    }
    return Optional.empty();
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProvisionRequest(io.cdap.cdap.internal.provision.ProvisionRequest) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions)

Aggregations

ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)68 SimpleProgramOptions (io.cdap.cdap.internal.app.runtime.SimpleProgramOptions)52 BasicArguments (io.cdap.cdap.internal.app.runtime.BasicArguments)40 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)40 ProgramDescriptor (io.cdap.cdap.app.program.ProgramDescriptor)32 HashMap (java.util.HashMap)26 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)24 IOException (java.io.IOException)24 ProgramId (io.cdap.cdap.proto.id.ProgramId)22 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)20 Test (org.junit.Test)14 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)12 Program (io.cdap.cdap.app.program.Program)12 ProgramStateWriter (io.cdap.cdap.app.runtime.ProgramStateWriter)12 SystemArguments (io.cdap.cdap.internal.app.runtime.SystemArguments)12 Map (java.util.Map)12 Injector (com.google.inject.Injector)10 MetricsCollectionService (io.cdap.cdap.api.metrics.MetricsCollectionService)10 ClusterMode (io.cdap.cdap.app.guice.ClusterMode)10 MessagingService (io.cdap.cdap.messaging.MessagingService)10