Search in sources :

Example 1 with Program

use of co.cask.cdap.app.program.Program in project cdap by caskdata.

the class SparkRuntimeContextProvider method createIfNotExists.

/**
 * Creates a singleton {@link SparkRuntimeContext}.
 * It has assumption on file location that are localized by the SparkRuntimeService.
 */
private static synchronized SparkRuntimeContext createIfNotExists() {
    if (sparkRuntimeContext != null) {
        return sparkRuntimeContext;
    }
    try {
        CConfiguration cConf = createCConf();
        Configuration hConf = createHConf();
        SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(hConf);
        // Should be yarn only and only for executor node, not the driver node.
        Preconditions.checkState(!contextConfig.isLocal() && Boolean.parseBoolean(System.getenv("SPARK_YARN_MODE")), "SparkContextProvider.getSparkContext should only be called in Spark executor process.");
        // Create the program
        Program program = createProgram(cConf, contextConfig);
        Injector injector = createInjector(cConf, hConf, contextConfig.getProgramId(), contextConfig.getProgramOptions());
        Service logAppenderService = new LogAppenderService(injector.getInstance(LogAppenderInitializer.class), contextConfig.getProgramOptions());
        ZKClientService zkClientService = injector.getInstance(ZKClientService.class);
        KafkaClientService kafkaClientService = injector.getInstance(KafkaClientService.class);
        MetricsCollectionService metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
        StreamCoordinatorClient streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class);
        SparkServiceAnnouncer serviceAnnouncer = injector.getInstance(SparkServiceAnnouncer.class);
        // Use the shutdown hook to shutdown services, since this class should only be loaded from System classloader
        // of the spark executor, hence there should be exactly one instance only.
        // The problem with not shutting down nicely is that some logs/metrics might be lost
        Services.chainStart(logAppenderService, zkClientService, kafkaClientService, metricsCollectionService, streamCoordinatorClient);
        Runtime.getRuntime().addShutdownHook(new Thread() {

            @Override
            public void run() {
                // The logger may already been shutdown. Use System.out/err instead
                System.out.println("Shutting SparkClassLoader services");
                serviceAnnouncer.close();
                Future<List<ListenableFuture<Service.State>>> future = Services.chainStop(logAppenderService, streamCoordinatorClient, metricsCollectionService, kafkaClientService, zkClientService);
                try {
                    List<ListenableFuture<Service.State>> futures = future.get(5, TimeUnit.SECONDS);
                    System.out.println("SparkClassLoader services shutdown completed: " + futures);
                } catch (Exception e) {
                    System.err.println("Exception when shutting down services");
                    e.printStackTrace(System.err);
                }
            }
        });
        // Constructor the DatasetFramework
        DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
        WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
        DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, contextConfig.getApplicationSpecification());
        // Setup dataset framework context, if required
        if (programDatasetFramework instanceof ProgramContextAware) {
            ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
            ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
        }
        PluginInstantiator pluginInstantiator = createPluginInstantiator(cConf, contextConfig, program.getClassLoader());
        // Create the context object
        sparkRuntimeContext = new SparkRuntimeContext(contextConfig.getConfiguration(), program, contextConfig.getProgramOptions(), cConf, getHostname(), injector.getInstance(TransactionSystemClient.class), programDatasetFramework, injector.getInstance(DiscoveryServiceClient.class), metricsCollectionService, injector.getInstance(StreamAdmin.class), contextConfig.getWorkflowProgramInfo(), pluginInstantiator, injector.getInstance(SecureStore.class), injector.getInstance(SecureStoreManager.class), injector.getInstance(AuthorizationEnforcer.class), injector.getInstance(AuthenticationContext.class), injector.getInstance(MessagingService.class), serviceAnnouncer, injector.getInstance(PluginFinder.class), injector.getInstance(LocationFactory.class));
        LoggingContextAccessor.setLoggingContext(sparkRuntimeContext.getLoggingContext());
        return sparkRuntimeContext;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) NameMappedDatasetFramework(co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) LogAppenderInitializer(co.cask.cdap.logging.appender.LogAppenderInitializer) Injector(com.google.inject.Injector) List(java.util.List) Program(co.cask.cdap.app.program.Program) DefaultProgram(co.cask.cdap.app.program.DefaultProgram) KafkaClientService(org.apache.twill.kafka.client.KafkaClientService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) MessagingService(co.cask.cdap.messaging.MessagingService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) AbstractService(com.google.common.util.concurrent.AbstractService) ZKClientService(org.apache.twill.zookeeper.ZKClientService) ZKDiscoveryService(org.apache.twill.discovery.ZKDiscoveryService) Service(com.google.common.util.concurrent.Service) KafkaClientService(org.apache.twill.kafka.client.KafkaClientService) StreamCoordinatorClient(co.cask.cdap.data.stream.StreamCoordinatorClient) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) CConfiguration(co.cask.cdap.common.conf.CConfiguration) InvocationTargetException(java.lang.reflect.InvocationTargetException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ZKClientService(org.apache.twill.zookeeper.ZKClientService) WorkflowProgramInfo(co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) Future(java.util.concurrent.Future) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) PluginInstantiator(co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 2 with Program

use of co.cask.cdap.app.program.Program in project cdap by caskdata.

the class MapReduceTaskContextProvider method createCacheLoader.

/**
 * Creates a {@link CacheLoader} for the task context cache.
 */
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
    final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
    final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    final SecureStore secureStore = injector.getInstance(SecureStore.class);
    final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
    final MessagingService messagingService = injector.getInstance(MessagingService.class);
    // Multiple instances of BasicMapReduceTaskContext can share the same program.
    final AtomicReference<Program> programRef = new AtomicReference<>();
    return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {

        @Override
        public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
            TaskAttemptID taskAttemptId = key.getTaskAttemptID();
            // taskAttemptId could be null if used from a org.apache.hadoop.mapreduce.Partitioner or
            // from a org.apache.hadoop.io.RawComparator, in which case we can get the JobId from the conf. Note that the
            // JobId isn't in the conf for the OutputCommitter#setupJob method, in which case we use the taskAttemptId
            Path txFile = MainOutputCommitter.getTxFile(key.getConfiguration(), taskAttemptId != null ? taskAttemptId.getJobID() : null);
            FileSystem fs = txFile.getFileSystem(key.getConfiguration());
            Preconditions.checkArgument(fs.exists(txFile));
            Transaction tx;
            try (FSDataInputStream txFileInputStream = fs.open(txFile)) {
                byte[] txByteArray = ByteStreams.toByteArray(txFileInputStream);
                tx = new TransactionCodec().decode(txByteArray);
            }
            MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
            MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
            Program program = programRef.get();
            if (program == null) {
                // Creation of program is relatively cheap, so just create and do compare and set.
                programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
                program = programRef.get();
            }
            WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
            DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
            // Setup dataset framework context, if required
            if (programDatasetFramework instanceof ProgramContextAware) {
                ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
                ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
            }
            MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
            MetricsCollectionService metricsCollectionService = null;
            MapReduceMetrics.TaskType taskType = null;
            String taskId = null;
            ProgramOptions options = contextConfig.getProgramOptions();
            // from a org.apache.hadoop.io.RawComparator
            if (taskAttemptId != null) {
                taskId = taskAttemptId.getTaskID().toString();
                if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
                    taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
                    // if this is not for a mapper or a reducer, we don't need the metrics collection service
                    metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
                    options = new SimpleProgramOptions(options.getProgramId(), options.getArguments(), new BasicArguments(RuntimeArguments.extractScope("task", taskType.toString().toLowerCase(), contextConfig.getProgramOptions().getUserArguments().asMap())), options.isDebug());
                }
            }
            CConfiguration cConf = injector.getInstance(CConfiguration.class);
            TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
            return new BasicMapReduceTaskContext(program, options, cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, tx, programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService, mapReduceClassLoader);
        }
    };
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) NameMappedDatasetFramework(co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) FileSystem(org.apache.hadoop.fs.FileSystem) SecureStoreManager(co.cask.cdap.api.security.store.SecureStoreManager) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) MapReduceMetrics(co.cask.cdap.app.metrics.MapReduceMetrics) Path(org.apache.hadoop.fs.Path) Program(co.cask.cdap.app.program.Program) DefaultProgram(co.cask.cdap.app.program.DefaultProgram) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) MapReduceSpecification(co.cask.cdap.api.mapreduce.MapReduceSpecification) AtomicReference(java.util.concurrent.atomic.AtomicReference) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) SecureStore(co.cask.cdap.api.security.store.SecureStore) CConfiguration(co.cask.cdap.common.conf.CConfiguration) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(co.cask.cdap.app.runtime.ProgramOptions) MessagingService(co.cask.cdap.messaging.MessagingService) Transaction(org.apache.tephra.Transaction) WorkflowProgramInfo(co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) TransactionCodec(org.apache.tephra.TransactionCodec) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CacheLoader(com.google.common.cache.CacheLoader) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 3 with Program

use of co.cask.cdap.app.program.Program in project cdap by caskdata.

the class DefaultProgramWorkflowRunner method create.

@Override
public Runnable create(String name) {
    ProgramRunner programRunner = programRunnerFactory.create(programType);
    try {
        ProgramId programId = workflowProgram.getId().getParent().program(programType, name);
        Program program = Programs.create(cConf, workflowProgram, programId, programRunner);
        return getProgramRunnable(name, programRunner, program);
    } catch (Exception e) {
        closeProgramRunner(programRunner);
        throw Throwables.propagate(e);
    }
}
Also used : Program(co.cask.cdap.app.program.Program) ProgramRunner(co.cask.cdap.app.runtime.ProgramRunner) ProgramId(co.cask.cdap.proto.id.ProgramId) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with Program

use of co.cask.cdap.app.program.Program in project cdap by caskdata.

the class AppFabricTestHelper method submit.

/**
 * Submits a program execution.
 *
 * @param app the application containing the program
 * @param programClassName name of the program class
 * @param userArgs runtime arguments
 * @param folderSupplier a Supplier of temporary folder
 * @return a {@link ProgramController} for controlling the program execution.
 */
public static ProgramController submit(ApplicationWithPrograms app, String programClassName, Arguments userArgs, Supplier<File> folderSupplier) throws Exception {
    ProgramRunnerFactory runnerFactory = injector.getInstance(ProgramRunnerFactory.class);
    ProgramRunner runner = null;
    Program program = null;
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        if (programDescriptor.getSpecification().getClassName().equals(programClassName)) {
            runner = runnerFactory.create(programDescriptor.getProgramId().getType());
            program = createProgram(programDescriptor, app.getArtifactLocation(), runner, folderSupplier);
            break;
        }
    }
    Assert.assertNotNull(program);
    BasicArguments systemArgs = new BasicArguments(ImmutableMap.of(ProgramOptionConstants.RUN_ID, RunIds.generate().getId(), ProgramOptionConstants.HOST, InetAddress.getLoopbackAddress().getCanonicalHostName(), ProgramOptionConstants.ARTIFACT_ID, Joiner.on(":").join(app.getArtifactId().toIdParts())));
    return runner.run(program, new SimpleProgramOptions(program.getId(), systemArgs, userArgs));
}
Also used : Program(co.cask.cdap.app.program.Program) ProgramDescriptor(co.cask.cdap.app.program.ProgramDescriptor) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) ProgramRunner(co.cask.cdap.app.runtime.ProgramRunner) ProgramRunnerFactory(co.cask.cdap.app.runtime.ProgramRunnerFactory)

Example 5 with Program

use of co.cask.cdap.app.program.Program in project cdap by caskdata.

the class AbstractProgramRuntimeService method run.

@Override
public final RuntimeInfo run(ProgramDescriptor programDescriptor, ProgramOptions options) {
    ProgramId programId = programDescriptor.getProgramId();
    RunId runId = RunIds.generate();
    // Publish the program's starting state. We don't know the Twill RunId yet, hence always passing in null.
    programStateWriter.start(programId.run(runId), options, null);
    ProgramRunner runner = programRunnerFactory.create(programId.getType());
    File tempDir = createTempDirectory(programId, runId);
    Runnable cleanUpTask = createCleanupTask(tempDir, runner);
    try {
        // Get the artifact details and save it into the program options.
        ArtifactId artifactId = programDescriptor.getArtifactId();
        ArtifactDetail artifactDetail = getArtifactDetail(artifactId);
        ProgramOptions runtimeProgramOptions = updateProgramOptions(artifactId, programId, options, runId);
        // Take a snapshot of all the plugin artifacts used by the program
        ProgramOptions optionsWithPlugins = createPluginSnapshot(runtimeProgramOptions, programId, tempDir, programDescriptor.getApplicationSpecification());
        // Create and run the program
        Program executableProgram = createProgram(cConf, runner, programDescriptor, artifactDetail, tempDir);
        cleanUpTask = createCleanupTask(cleanUpTask, executableProgram);
        RuntimeInfo runtimeInfo = createRuntimeInfo(runner.run(executableProgram, optionsWithPlugins), programId, cleanUpTask);
        monitorProgram(runtimeInfo, cleanUpTask);
        return runtimeInfo;
    } catch (Exception e) {
        // Set the program state to an error when an exception is thrown
        programStateWriter.error(programId.run(runId), e);
        cleanUpTask.run();
        LOG.error("Exception while trying to run program", e);
        throw Throwables.propagate(e);
    }
}
Also used : Program(co.cask.cdap.app.program.Program) ArtifactId(co.cask.cdap.proto.id.ArtifactId) SimpleRuntimeInfo(co.cask.cdap.internal.app.runtime.service.SimpleRuntimeInfo) ProgramId(co.cask.cdap.proto.id.ProgramId) RunId(org.apache.twill.api.RunId) File(java.io.File) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) ArtifactNotFoundException(co.cask.cdap.common.ArtifactNotFoundException) IOException(java.io.IOException) ArtifactDetail(co.cask.cdap.internal.app.runtime.artifact.ArtifactDetail)

Aggregations

Program (co.cask.cdap.app.program.Program)8 SimpleProgramOptions (co.cask.cdap.internal.app.runtime.SimpleProgramOptions)6 ProgramId (co.cask.cdap.proto.id.ProgramId)5 IOException (java.io.IOException)5 CConfiguration (co.cask.cdap.common.conf.CConfiguration)4 ProgramDescriptor (co.cask.cdap.app.program.ProgramDescriptor)3 ArtifactNotFoundException (co.cask.cdap.common.ArtifactNotFoundException)3 BasicArguments (co.cask.cdap.internal.app.runtime.BasicArguments)3 ArtifactDetail (co.cask.cdap.internal.app.runtime.artifact.ArtifactDetail)3 ArtifactId (co.cask.cdap.proto.id.ArtifactId)3 File (java.io.File)3 ExecutionException (java.util.concurrent.ExecutionException)3 ArtifactVersion (co.cask.cdap.api.artifact.ArtifactVersion)2 MetricsCollectionService (co.cask.cdap.api.metrics.MetricsCollectionService)2 DefaultProgram (co.cask.cdap.app.program.DefaultProgram)2 ProgramRunner (co.cask.cdap.app.runtime.ProgramRunner)2 ProgramContextAware (co.cask.cdap.data.ProgramContextAware)2 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)2 BasicProgramContext (co.cask.cdap.internal.app.runtime.BasicProgramContext)2 ArtifactDescriptor (co.cask.cdap.internal.app.runtime.artifact.ArtifactDescriptor)2