use of io.cdap.cdap.data.ProgramContextAware in project cdap by caskdata.
the class ServiceProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType programType = program.getType();
Preconditions.checkNotNull(programType, "Missing processor type.");
Preconditions.checkArgument(programType == ProgramType.SERVICE, "Only Service process type is supported.");
ServiceSpecification spec = appSpec.getServices().get(program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
final PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
try {
RetryStrategy retryStrategy = SystemArguments.getRetryStrategy(options.getUserArguments().asMap(), program.getType(), cConf);
ArtifactManager artifactManager = artifactManagerFactory.create(program.getId().getNamespaceId(), retryStrategy);
ServiceHttpServer component = new ServiceHttpServer(host, program, options, cConf, spec, instanceId, instanceCount, serviceAnnouncer, metricsCollectionService, datasetFramework, txClient, discoveryServiceClient, pluginInstantiator, secureStore, secureStoreManager, messagingService, artifactManager, metadataReader, metadataPublisher, namespaceQueryAdmin, pluginFinder, fieldLineageWriter, transactionRunner, preferencesFetcher, remoteClientFactory, contextAccessEnforcer);
// Add a service listener to make sure the plugin instantiator is closed when the http server is finished.
component.addListener(createRuntimeServiceListener(Collections.singleton(pluginInstantiator)), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new ServiceProgramControllerAdapter(component, program.getId().run(runId));
component.start();
return controller;
} catch (Throwable t) {
Closeables.closeQuietly(pluginInstantiator);
throw t;
}
}
use of io.cdap.cdap.data.ProgramContextAware in project cdap by caskdata.
the class MapReduceTaskContextProvider method createCacheLoader.
/**
* Creates a {@link CacheLoader} for the task context cache.
*/
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
SecureStore secureStore = injector.getInstance(SecureStore.class);
SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
MessagingService messagingService = injector.getInstance(MessagingService.class);
// Multiple instances of BasicMapReduceTaskContext can share the same program.
AtomicReference<Program> programRef = new AtomicReference<>();
MetadataReader metadataReader = injector.getInstance(MetadataReader.class);
MetadataPublisher metadataPublisher = injector.getInstance(MetadataPublisher.class);
FieldLineageWriter fieldLineageWriter = injector.getInstance(FieldLineageWriter.class);
RemoteClientFactory remoteClientFactory = injector.getInstance(RemoteClientFactory.class);
return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {
@Override
public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
TaskAttemptID taskAttemptId = key.getTaskAttemptID();
// taskAttemptId could be null if used from a org.apache.hadoop.mapreduce.Partitioner or
// from a org.apache.hadoop.io.RawComparator, in which case we can get the JobId from the conf. Note that the
// JobId isn't in the conf for the OutputCommitter#setupJob method, in which case we use the taskAttemptId
Path txFile = MainOutputCommitter.getTxFile(key.getConfiguration(), taskAttemptId != null ? taskAttemptId.getJobID() : null);
FileSystem fs = txFile.getFileSystem(key.getConfiguration());
Transaction transaction = null;
if (fs.exists(txFile)) {
try (FSDataInputStream txFileInputStream = fs.open(txFile)) {
transaction = new TransactionCodec().decode(ByteStreams.toByteArray(txFileInputStream));
}
}
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
Program program = programRef.get();
if (program == null) {
// Creation of program is relatively cheap, so just create and do compare and set.
programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
program = programRef.get();
}
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
MetricsCollectionService metricsCollectionService = null;
MapReduceMetrics.TaskType taskType = null;
String taskId = null;
ProgramOptions options = contextConfig.getProgramOptions();
// from a org.apache.hadoop.io.RawComparator
if (taskAttemptId != null) {
taskId = taskAttemptId.getTaskID().toString();
if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
// if this is not for a mapper or a reducer, we don't need the metrics collection service
metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
options = new SimpleProgramOptions(options.getProgramId(), options.getArguments(), new BasicArguments(RuntimeArguments.extractScope("task", taskType.toString().toLowerCase(), contextConfig.getProgramOptions().getUserArguments().asMap())), options.isDebug());
}
}
CConfiguration cConf = injector.getInstance(CConfiguration.class);
TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
NamespaceQueryAdmin namespaceQueryAdmin = injector.getInstance(NamespaceQueryAdmin.class);
return new BasicMapReduceTaskContext(program, options, cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, transaction, programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, mapReduceClassLoader, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
}
};
}
use of io.cdap.cdap.data.ProgramContextAware in project cdap by caskdata.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
LOG.trace("Starting Spark program {} with SparkProgramRunner of ClassLoader {}", program.getId(), getClass().getClassLoader());
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, metricsCollectionService, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory, () -> {
});
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
boolean isLocal = SparkRuntimeContextConfig.isLocal(options);
SparkSubmitter submitter;
// If MasterEnvironment is not available, use non-master env spark submitters
MasterEnvironment masterEnv = MasterEnvironments.getMasterEnvironment();
if (masterEnv != null && cConf.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
submitter = new MasterEnvironmentSparkSubmitter(cConf, locationFactory, host, runtimeContext, masterEnv);
} else {
submitter = isLocal ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
}
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory, isLocal, fieldLineageWriter, masterEnv);
sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (isLocal || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAllQuietly(closeables);
throw Throwables.propagate(t);
}
}
use of io.cdap.cdap.data.ProgramContextAware in project cdap by caskdata.
the class WorkerProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ProgramType programType = program.getType();
Preconditions.checkNotNull(programType, "Missing processor type.");
Preconditions.checkArgument(programType == ProgramType.WORKER, "Only Worker process type is supported.");
WorkerSpecification workerSpec = appSpec.getWorkers().get(program.getName());
Preconditions.checkArgument(workerSpec != null, "Missing Worker specification for %s", program.getId());
String instances = options.getArguments().getOption(ProgramOptionConstants.INSTANCES, String.valueOf(workerSpec.getInstances()));
WorkerSpecification newWorkerSpec = new WorkerSpecification(workerSpec.getClassName(), workerSpec.getName(), workerSpec.getDescription(), workerSpec.getProperties(), workerSpec.getDatasets(), workerSpec.getResources(), Integer.parseInt(instances), workerSpec.getPlugins());
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
final PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
try {
BasicWorkerContext context = new BasicWorkerContext(newWorkerSpec, program, options, cConf, instanceId, instanceCount, metricsCollectionService, datasetFramework, txClient, discoveryServiceClient, pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
WorkerDriver worker = new WorkerDriver(program, newWorkerSpec, context);
// Add a service listener to make sure the plugin instantiator is closed when the worker driver finished.
worker.addListener(createRuntimeServiceListener(Collections.singleton(pluginInstantiator)), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new WorkerControllerServiceAdapter(worker, program.getId().run(runId));
worker.start();
return controller;
} catch (Throwable t) {
Closeables.closeQuietly(pluginInstantiator);
throw t;
}
}
use of io.cdap.cdap.data.ProgramContextAware in project cdap by caskdata.
the class WorkflowProgramRunner method run.
@Override
public ProgramController run(final Program program, final ProgramOptions options) {
// Extract and verify options
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.WORKFLOW, "Only WORKFLOW process type is supported.");
WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(program.getName());
Preconditions.checkNotNull(workflowSpec, "Missing WorkflowSpecification for %s", program.getName());
final RunId runId = ProgramRunners.getRunId(options);
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
// List of all Closeable resources that needs to be cleanup
final List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
WorkflowDriver driver = new WorkflowDriver(program, options, workflowSpec, programRunnerFactory, metricsCollectionService, datasetFramework, discoveryServiceClient, txClient, workflowStateWriter, cConf, pluginInstantiator, secureStore, secureStoreManager, messagingService, programStateWriter, metadataReader, metadataPublisher, fieldLineageWriter, namespaceQueryAdmin, remoteClientFactory);
// Controller needs to be created before starting the driver so that the state change of the driver
// service can be fully captured by the controller.
ProgramController controller = new WorkflowProgramController(program.getId().run(runId), driver);
driver.start();
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
Aggregations