use of co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class MapReduceProgramRunner method run.
@Override
public ProgramController run(final Program program, ProgramOptions options) {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
MapReduce mapReduce;
try {
mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
// List of all Closeable resources that needs to be cleanup
List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, streamAdmin, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService);
Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
// note: this sets logging context on the thread level
LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
// Set the job queue to hConf if it is provided
Configuration hConf = new Configuration(this.hConf);
String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
}
Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, streamAdmin, txSystemClient, authorizationEnforcer, authenticationContext);
mapReduceRuntimeService.addListener(createRuntimeServiceListener(program.getId(), runId, closeables, arguments, options.getUserArguments()), Threads.SAME_THREAD_EXECUTOR);
final ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
LOG.debug("Starting MapReduce Job: {}", context);
// be running the job, but the data directory will be owned by cdap.
if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
mapReduceRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
}
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class MapReduceTaskContextProvider method createCacheLoader.
/**
* Creates a {@link CacheLoader} for the task context cache.
*/
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
final SecureStore secureStore = injector.getInstance(SecureStore.class);
final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
final MessagingService messagingService = injector.getInstance(MessagingService.class);
// Multiple instances of BasicMapReduceTaskContext can shares the same program.
final AtomicReference<Program> programRef = new AtomicReference<>();
return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {
@Override
public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
Program program = programRef.get();
if (program == null) {
// Creation of program is relatively cheap, so just create and do compare and set.
programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
program = programRef.get();
}
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
MetricsCollectionService metricsCollectionService = null;
MapReduceMetrics.TaskType taskType = null;
String taskId = null;
TaskAttemptID taskAttemptId = key.getTaskAttemptID();
// from a org.apache.hadoop.io.RawComparator
if (taskAttemptId != null) {
taskId = taskAttemptId.getTaskID().toString();
if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
// if this is not for a mapper or a reducer, we don't need the metrics collection service
metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
}
}
CConfiguration cConf = injector.getInstance(CConfiguration.class);
TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
return new BasicMapReduceTaskContext(program, contextConfig.getProgramOptions(), cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, contextConfig.getTx(), programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService);
}
};
}
use of co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class SparkRuntimeContextProvider method createIfNotExists.
/**
* Creates a singleton {@link SparkRuntimeContext}.
* It has assumption on file location that are localized by the SparkRuntimeService.
*/
private static synchronized SparkRuntimeContext createIfNotExists() {
if (sparkRuntimeContext != null) {
return sparkRuntimeContext;
}
try {
CConfiguration cConf = createCConf();
Configuration hConf = createHConf();
SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(hConf);
// Should be yarn only and only for executor node, not the driver node.
Preconditions.checkState(!contextConfig.isLocal() && Boolean.parseBoolean(System.getenv("SPARK_YARN_MODE")), "SparkContextProvider.getSparkContext should only be called in Spark executor process.");
// Create the program
Program program = createProgram(cConf, contextConfig);
Injector injector = createInjector(cConf, hConf, contextConfig.getProgramId(), contextConfig.getProgramOptions());
final Service logAppenderService = new LogAppenderService(injector.getInstance(LogAppenderInitializer.class), contextConfig.getProgramOptions());
final ZKClientService zkClientService = injector.getInstance(ZKClientService.class);
final KafkaClientService kafkaClientService = injector.getInstance(KafkaClientService.class);
final MetricsCollectionService metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
final StreamCoordinatorClient streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class);
// Use the shutdown hook to shutdown services, since this class should only be loaded from System classloader
// of the spark executor, hence there should be exactly one instance only.
// The problem with not shutting down nicely is that some logs/metrics might be lost
Services.chainStart(logAppenderService, zkClientService, kafkaClientService, metricsCollectionService, streamCoordinatorClient);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// The logger may already been shutdown. Use System.out/err instead
System.out.println("Shutting SparkClassLoader services");
Future<List<ListenableFuture<Service.State>>> future = Services.chainStop(logAppenderService, streamCoordinatorClient, metricsCollectionService, kafkaClientService, zkClientService);
try {
List<ListenableFuture<Service.State>> futures = future.get(5, TimeUnit.SECONDS);
System.out.println("SparkClassLoader services shutdown completed: " + futures);
} catch (Exception e) {
System.err.println("Exception when shutting down services");
e.printStackTrace(System.err);
}
}
});
// Constructor the DatasetFramework
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, contextConfig.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(cConf, contextConfig, program.getClassLoader());
// Create the context object
sparkRuntimeContext = new SparkRuntimeContext(contextConfig.getConfiguration(), program, contextConfig.getProgramOptions(), cConf, getHostname(), injector.getInstance(TransactionSystemClient.class), programDatasetFramework, injector.getInstance(DiscoveryServiceClient.class), metricsCollectionService, injector.getInstance(StreamAdmin.class), contextConfig.getWorkflowProgramInfo(), pluginInstantiator, injector.getInstance(SecureStore.class), injector.getInstance(SecureStoreManager.class), injector.getInstance(AuthorizationEnforcer.class), injector.getInstance(AuthenticationContext.class), injector.getInstance(MessagingService.class));
LoggingContextAccessor.setLoggingContext(sparkRuntimeContext.getLoggingContext());
return sparkRuntimeContext;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, discoveryServiceClient, metricsCollectionService, streamAdmin, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService);
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
SparkSubmitter submitter = SparkRuntimeContextConfig.isLocal(hConf) ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter);
sparkRuntimeService.addListener(createRuntimeServiceListener(program.getId(), runId, arguments, options.getUserArguments(), closeables, runtimeStore), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (SparkRuntimeContextConfig.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAll(closeables);
throw Throwables.propagate(t);
}
}
use of co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class MapReduceClassLoader method createMapReduceLoggingContext.
/**
* Creates logging context for MapReduce program. If the program is started
* by Workflow an instance of {@link WorkflowProgramLoggingContext} is returned,
* otherwise an instance of {@link MapReduceLoggingContext} is returned.
*/
private LoggingContext createMapReduceLoggingContext() {
MapReduceContextConfig contextConfig = new MapReduceContextConfig(parameters.getHConf());
ProgramId programId = contextConfig.getProgramId();
RunId runId = ProgramRunners.getRunId(contextConfig.getProgramOptions());
WorkflowProgramInfo workflowProgramInfo = contextConfig.getWorkflowProgramInfo();
if (workflowProgramInfo == null) {
return new MapReduceLoggingContext(programId.getNamespace(), programId.getApplication(), programId.getProgram(), runId.getId());
}
String workflowId = workflowProgramInfo.getName();
String workflowRunId = workflowProgramInfo.getRunId().getId();
return new WorkflowProgramLoggingContext(programId.getNamespace(), programId.getApplication(), workflowId, workflowRunId, ProgramType.MAPREDUCE, programId.getProgram(), runId.getId());
}
Aggregations