use of io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by cdapio.
the class SparkRuntimeService method emitFieldLineage.
private boolean emitFieldLineage() {
if (runtimeContext.getFieldLineageOperations().isEmpty()) {
// no operations to emit
return false;
}
ProgramStatus status = context.getState().getStatus();
if (ProgramStatus.COMPLETED != status) {
// Spark program failed, so field operations wont be emitted
return false;
}
WorkflowProgramInfo workflowInfo = runtimeContext.getWorkflowInfo();
return workflowInfo == null || !workflowInfo.fieldLineageConsolidationEnabled();
}
use of io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by cdapio.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
LOG.trace("Starting Spark program {} with SparkProgramRunner of ClassLoader {}", program.getId(), getClass().getClassLoader());
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, metricsCollectionService, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory, () -> {
});
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
boolean isLocal = SparkRuntimeContextConfig.isLocal(options);
SparkSubmitter submitter;
// If MasterEnvironment is not available, use non-master env spark submitters
MasterEnvironment masterEnv = MasterEnvironments.getMasterEnvironment();
if (masterEnv != null && cConf.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
submitter = new MasterEnvironmentSparkSubmitter(cConf, locationFactory, host, runtimeContext, masterEnv);
} else {
submitter = isLocal ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
}
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory, isLocal, fieldLineageWriter, masterEnv);
sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (isLocal || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAllQuietly(closeables);
throw Throwables.propagate(t);
}
}
use of io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class RuntimeMonitors method setupMonitoring.
/**
* Setups the monitoring routes and proxy for runtime monitoring.
*/
public static void setupMonitoring(Injector injector, ProgramOptions programOpts) throws Exception {
CConfiguration cConf = injector.getInstance(CConfiguration.class);
RuntimeMonitorType monitorType = injector.getInstance(RuntimeMonitorType.class);
if (monitorType == RuntimeMonitorType.URL) {
// This shouldn't be null, otherwise the type won't be URL.
String monitorURL = cConf.get(Constants.RuntimeMonitor.MONITOR_URL);
monitorURL = monitorURL.endsWith("/") ? monitorURL : monitorURL + "/";
ProgramRunId programRunId = injector.getInstance(ProgramRunId.class);
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(programOpts.getArguments());
if (workflowInfo != null) {
// If the program is launched by Workflow, use the Workflow run id to make service request.
programRunId = new ProgramRunId(programRunId.getNamespace(), programRunId.getApplication(), ProgramType.WORKFLOW, workflowInfo.getName(), workflowInfo.getRunId().getId());
}
URI runtimeServiceBaseURI = URI.create(monitorURL).resolve(String.format("v3Internal/runtime/namespaces/%s/apps/%s/versions/%s/%s/%s/runs/%s/services/", programRunId.getNamespace(), programRunId.getApplication(), programRunId.getVersion(), programRunId.getType().getCategoryName(), programRunId.getProgram(), programRunId.getRun()));
System.setProperty(RemoteClient.RUNTIME_SERVICE_ROUTING_BASE_URI, runtimeServiceBaseURI.toString());
LOG.debug("Setting runtime service routing base URI to {}", runtimeServiceBaseURI);
} else {
Authenticator.setDefault(injector.getInstance(Authenticator.class));
ProxySelector.setDefault(injector.getInstance(ProxySelector.class));
}
}
use of io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class MapReduceProgramRunner method run.
@Override
public ProgramController run(final Program program, ProgramOptions options) {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
MapReduce mapReduce;
try {
mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
// List of all Closeable resources that needs to be cleanup
List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
closeables.add(context);
Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
// note: this sets logging context on the thread level
LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
// Set the job queue to hConf if it is provided
Configuration hConf = new Configuration(this.hConf);
String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
}
ClusterMode clusterMode = ProgramRunners.getClusterMode(options);
Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, clusterMode, fieldLineageWriter);
mapReduceRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
LOG.debug("Starting MapReduce Job: {}", context);
// be running the job, but the data directory will be owned by cdap.
if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
mapReduceRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
}
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
use of io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo in project cdap by caskdata.
the class MapReduceContextConfig method getWorkflowProgramInfo.
/**
* Returns the {@link WorkflowProgramInfo} if it is running inside Workflow or {@code null} if not.
*/
@Nullable
WorkflowProgramInfo getWorkflowProgramInfo() {
String info = hConf.get(HCONF_ATTR_WORKFLOW_INFO);
if (info == null) {
return null;
}
WorkflowProgramInfo workflowProgramInfo = GSON.fromJson(info, WorkflowProgramInfo.class);
workflowProgramInfo.getWorkflowToken().disablePut();
return workflowProgramInfo;
}
Aggregations