use of co.cask.cdap.data.ProgramContextAware in project cdap by caskdata.
the class MapReduceProgramRunner method run.
@Override
public ProgramController run(final Program program, ProgramOptions options) {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
MapReduce mapReduce;
try {
mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
// List of all Closeable resources that needs to be cleanup
List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, streamAdmin, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService);
Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
// note: this sets logging context on the thread level
LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
// Set the job queue to hConf if it is provided
Configuration hConf = new Configuration(this.hConf);
String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
}
Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, streamAdmin, txSystemClient, authorizationEnforcer, authenticationContext);
mapReduceRuntimeService.addListener(createRuntimeServiceListener(program.getId(), runId, closeables, arguments, options.getUserArguments()), Threads.SAME_THREAD_EXECUTOR);
final ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
LOG.debug("Starting MapReduce Job: {}", context);
// be running the job, but the data directory will be owned by cdap.
if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
mapReduceRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
}
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.data.ProgramContextAware in project cdap by caskdata.
the class MapReduceTaskContextProvider method createCacheLoader.
/**
* Creates a {@link CacheLoader} for the task context cache.
*/
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
final SecureStore secureStore = injector.getInstance(SecureStore.class);
final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
final MessagingService messagingService = injector.getInstance(MessagingService.class);
// Multiple instances of BasicMapReduceTaskContext can shares the same program.
final AtomicReference<Program> programRef = new AtomicReference<>();
return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {
@Override
public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
Program program = programRef.get();
if (program == null) {
// Creation of program is relatively cheap, so just create and do compare and set.
programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
program = programRef.get();
}
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
MetricsCollectionService metricsCollectionService = null;
MapReduceMetrics.TaskType taskType = null;
String taskId = null;
TaskAttemptID taskAttemptId = key.getTaskAttemptID();
// from a org.apache.hadoop.io.RawComparator
if (taskAttemptId != null) {
taskId = taskAttemptId.getTaskID().toString();
if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
// if this is not for a mapper or a reducer, we don't need the metrics collection service
metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
}
}
CConfiguration cConf = injector.getInstance(CConfiguration.class);
TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
return new BasicMapReduceTaskContext(program, contextConfig.getProgramOptions(), cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, contextConfig.getTx(), programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService);
}
};
}
use of co.cask.cdap.data.ProgramContextAware in project cdap by caskdata.
the class WorkflowProgramRunner method run.
@Override
public ProgramController run(final Program program, final ProgramOptions options) {
// Extract and verify options
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.WORKFLOW, "Only WORKFLOW process type is supported.");
WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(program.getName());
Preconditions.checkNotNull(workflowSpec, "Missing WorkflowSpecification for %s", program.getName());
final RunId runId = ProgramRunners.getRunId(options);
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
// List of all Closeable resources that needs to be cleanup
final List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
WorkflowDriver driver = new WorkflowDriver(program, options, hostname, workflowSpec, programRunnerFactory, metricsCollectionService, datasetFramework, discoveryServiceClient, txClient, runtimeStore, cConf, pluginInstantiator, secureStore, secureStoreManager, messagingService);
// Controller needs to be created before starting the driver so that the state change of the driver
// service can be fully captured by the controller.
final ProgramController controller = new WorkflowProgramController(program, driver, serviceAnnouncer, runId);
final String twillRunId = options.getArguments().getOption(ProgramOptionConstants.TWILL_RUN_ID);
controller.addListener(new AbstractListener() {
@Override
public void init(ProgramController.State state, @Nullable Throwable cause) {
// Get start time from RunId
long startTimeInSeconds = RunIds.getTime(controller.getRunId(), TimeUnit.SECONDS);
if (startTimeInSeconds == -1) {
// If RunId is not time-based, use current time as start time
startTimeInSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
}
final long finalStartTimeInSeconds = startTimeInSeconds;
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setStart(program.getId(), runId.getId(), finalStartTimeInSeconds, twillRunId, options.getUserArguments().asMap(), options.getArguments().asMap());
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
// This can happen if there is a delay in calling the init listener
if (state == ProgramController.State.COMPLETED) {
completed();
}
// This can happen if there is a delay in calling the init listener
if (state == ProgramController.State.ERROR) {
error(controller.getFailureCause());
}
}
@Override
public void completed() {
LOG.debug("Program {} with run id {} completed successfully.", program.getId(), runId.getId());
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.COMPLETED.getRunStatus());
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
}
@Override
public void killed() {
LOG.debug("Program {} with run id {} killed.", program.getId(), runId.getId());
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.KILLED.getRunStatus());
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
}
@Override
public void suspended() {
LOG.debug("Suspending Program {} with run id {}.", program.getId(), runId.getId());
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setSuspend(program.getId(), runId.getId());
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
}
@Override
public void resuming() {
LOG.debug("Resuming Program {} {}.", program.getId(), runId.getId());
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setResume(program.getId(), runId.getId());
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
}
@Override
public void error(final Throwable cause) {
LOG.info("Program {} with run id {} stopped because of error {}.", program.getId(), runId.getId(), cause);
closeAllQuietly(closeables);
Retries.supplyWithRetries(new Supplier<Void>() {
@Override
public Void get() {
runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.ERROR.getRunStatus(), new BasicThrowable(cause));
return null;
}
}, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
}
}, Threads.SAME_THREAD_EXECUTOR);
driver.start();
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.data.ProgramContextAware in project cdap by caskdata.
the class ServiceProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType programType = program.getType();
Preconditions.checkNotNull(programType, "Missing processor type.");
Preconditions.checkArgument(programType == ProgramType.SERVICE, "Only Service process type is supported.");
ServiceSpecification spec = appSpec.getServices().get(program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
final PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
try {
ServiceHttpServer component = new ServiceHttpServer(host, program, options, cConf, spec, instanceId, instanceCount, serviceAnnouncer, metricsCollectionService, datasetFramework, txClient, discoveryServiceClient, pluginInstantiator, secureStore, secureStoreManager, messagingService, defaultArtifactManager);
// Add a service listener to make sure the plugin instantiator is closed when the http server is finished.
component.addListener(new ServiceListenerAdapter() {
@Override
public void terminated(Service.State from) {
Closeables.closeQuietly(pluginInstantiator);
}
@Override
public void failed(Service.State from, Throwable failure) {
Closeables.closeQuietly(pluginInstantiator);
}
}, Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new ServiceProgramControllerAdapter(component, program.getId(), runId, spec.getName() + "-" + instanceId);
component.start();
return controller;
} catch (Throwable t) {
Closeables.closeQuietly(pluginInstantiator);
throw t;
}
}
use of co.cask.cdap.data.ProgramContextAware in project cdap by caskdata.
the class SparkRuntimeContextProvider method createIfNotExists.
/**
* Creates a singleton {@link SparkRuntimeContext}.
* It has assumption on file location that are localized by the SparkRuntimeService.
*/
private static synchronized SparkRuntimeContext createIfNotExists() {
if (sparkRuntimeContext != null) {
return sparkRuntimeContext;
}
try {
CConfiguration cConf = createCConf();
Configuration hConf = createHConf();
SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(hConf);
// Should be yarn only and only for executor node, not the driver node.
Preconditions.checkState(!contextConfig.isLocal() && Boolean.parseBoolean(System.getenv("SPARK_YARN_MODE")), "SparkContextProvider.getSparkContext should only be called in Spark executor process.");
// Create the program
Program program = createProgram(cConf, contextConfig);
Injector injector = createInjector(cConf, hConf, contextConfig.getProgramId(), contextConfig.getProgramOptions());
final Service logAppenderService = new LogAppenderService(injector.getInstance(LogAppenderInitializer.class), contextConfig.getProgramOptions());
final ZKClientService zkClientService = injector.getInstance(ZKClientService.class);
final KafkaClientService kafkaClientService = injector.getInstance(KafkaClientService.class);
final MetricsCollectionService metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
final StreamCoordinatorClient streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class);
// Use the shutdown hook to shutdown services, since this class should only be loaded from System classloader
// of the spark executor, hence there should be exactly one instance only.
// The problem with not shutting down nicely is that some logs/metrics might be lost
Services.chainStart(logAppenderService, zkClientService, kafkaClientService, metricsCollectionService, streamCoordinatorClient);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// The logger may already been shutdown. Use System.out/err instead
System.out.println("Shutting SparkClassLoader services");
Future<List<ListenableFuture<Service.State>>> future = Services.chainStop(logAppenderService, streamCoordinatorClient, metricsCollectionService, kafkaClientService, zkClientService);
try {
List<ListenableFuture<Service.State>> futures = future.get(5, TimeUnit.SECONDS);
System.out.println("SparkClassLoader services shutdown completed: " + futures);
} catch (Exception e) {
System.err.println("Exception when shutting down services");
e.printStackTrace(System.err);
}
}
});
// Constructor the DatasetFramework
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, contextConfig.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(cConf, contextConfig, program.getClassLoader());
// Create the context object
sparkRuntimeContext = new SparkRuntimeContext(contextConfig.getConfiguration(), program, contextConfig.getProgramOptions(), cConf, getHostname(), injector.getInstance(TransactionSystemClient.class), programDatasetFramework, injector.getInstance(DiscoveryServiceClient.class), metricsCollectionService, injector.getInstance(StreamAdmin.class), contextConfig.getWorkflowProgramInfo(), pluginInstantiator, injector.getInstance(SecureStore.class), injector.getInstance(SecureStoreManager.class), injector.getInstance(AuthorizationEnforcer.class), injector.getInstance(AuthenticationContext.class), injector.getInstance(MessagingService.class));
LoggingContextAccessor.setLoggingContext(sparkRuntimeContext.getLoggingContext());
return sparkRuntimeContext;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
Aggregations