use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.
the class CustomActionExecutor method createCustomAction.
@SuppressWarnings("unchecked")
private CustomAction createCustomAction(BasicCustomActionContext context, InstantiatorFactory instantiator, ClassLoader classLoader) throws Exception {
Class<?> clz = Class.forName(context.getSpecification().getClassName(), true, classLoader);
Preconditions.checkArgument(CustomAction.class.isAssignableFrom(clz), "%s is not a CustomAction.", clz);
CustomAction action = instantiator.get(TypeToken.of((Class<? extends CustomAction>) clz)).create();
Reflections.visit(action, action.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics()));
return action;
}
use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.
the class MapReduceProgramRunner method run.
@Override
public ProgramController run(final Program program, ProgramOptions options) {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
MapReduce mapReduce;
try {
mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
// List of all Closeable resources that needs to be cleanup
List<Closeable> closeables = new ArrayList<>();
try {
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.add(pluginInstantiator);
}
final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
closeables.add(context);
Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
// note: this sets logging context on the thread level
LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
// Set the job queue to hConf if it is provided
Configuration hConf = new Configuration(this.hConf);
String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
}
ClusterMode clusterMode = ProgramRunners.getClusterMode(options);
Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, clusterMode, fieldLineageWriter);
mapReduceRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
LOG.debug("Starting MapReduce Job: {}", context);
// be running the job, but the data directory will be owned by cdap.
if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
mapReduceRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
}
return controller;
} catch (Exception e) {
closeAllQuietly(closeables);
throw Throwables.propagate(e);
}
}
use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.
the class ReducerWrapper method run.
@SuppressWarnings("unchecked")
@Override
public void run(Context context) throws IOException, InterruptedException {
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
ClassLoader weakReferenceClassLoader = new WeakReferenceDelegatorClassLoader(classLoader);
BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);
long metricsReportInterval = basicMapReduceContext.getMetricsReportIntervalMillis();
final ReduceTaskMetricsWriter reduceTaskMetricsWriter = new ReduceTaskMetricsWriter(basicMapReduceContext.getProgramMetrics(), context);
// this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
WrappedReducer.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext, reduceTaskMetricsWriter);
basicMapReduceContext.setHadoopContext(flushingContext);
String userReducer = context.getConfiguration().get(ATTR_REDUCER_CLASS);
ClassLoader programClassLoader = classLoader.getProgramClassLoader();
Reducer delegate = createReducerInstance(programClassLoader, userReducer);
// injecting runtime components, like datasets, etc.
try {
Reflections.visit(delegate, delegate.getClass(), new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()), new MetricsFieldSetter(basicMapReduceContext.getMetrics()), new DataSetFieldSetter(basicMapReduceContext));
} catch (Throwable t) {
LOG.error("Failed to inject fields to {}.", delegate.getClass(), t);
throw Throwables.propagate(t);
}
ClassLoader oldClassLoader;
if (delegate instanceof ProgramLifecycle) {
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
} catch (Exception e) {
LOG.error("Failed to initialize reducer with {}", basicMapReduceContext, e);
throw Throwables.propagate(e);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
}
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
delegate.run(flushingContext);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
// memory by tx agent)
try {
basicMapReduceContext.flushOperations();
} catch (Exception e) {
LOG.error("Failed to flush operations at the end of reducer of " + basicMapReduceContext, e);
throw Throwables.propagate(e);
}
// Close all writers created by MultipleOutputs
basicMapReduceContext.closeMultiOutputs();
if (delegate instanceof ProgramLifecycle) {
oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
try {
((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
} catch (Exception e) {
LOG.error("Error during destroy of reducer {}", basicMapReduceContext, e);
// Do nothing, try to finish
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
}
reduceTaskMetricsWriter.reportMetrics();
}
use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.
the class SparkRuntimeService method startUp.
@Override
protected void startUp() throws Exception {
// additional spark job initialization at run-time
// This context is for calling initialize and onFinish on the Spark program
// Fields injection for the Spark program
// It has to be done in here instead of in SparkProgramRunner for the @UseDataset injection
// since the dataset cache being used in Spark is a MultiThreadDatasetCache
// The AbstractExecutionThreadService guarantees that startUp(), run() and shutDown() all happens in the same thread
Reflections.visit(spark, spark.getClass(), new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
// Since we are updating cConf, make a copy of it to which updates will be made.
CConfiguration cConfCopy = CConfiguration.copy(cConf);
// Creates a temporary directory locally for storing all generated files.
File tempDir = DirUtils.createTempDir(new File(cConfCopy.get(Constants.CFG_LOCAL_DATA_DIR), cConfCopy.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
tempDir.mkdirs();
this.cleanupTask = createCleanupTask(tempDir, System.getProperties());
try {
initialize();
SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(runtimeContext.getConfiguration());
final List<LocalizeResource> localizeResources = new ArrayList<>();
final URI jobFile = context.isPySpark() ? getPySparkScript(tempDir) : createJobJar(tempDir);
List<File> extraPySparkFiles = new ArrayList<>();
String metricsConfPath;
String classpath = "";
// Setup the SparkConf with properties from spark-defaults.conf
Properties sparkDefaultConf = SparkPackageUtils.getSparkDefaultConf();
for (String key : sparkDefaultConf.stringPropertyNames()) {
SparkRuntimeEnv.setProperty(key, sparkDefaultConf.getProperty(key));
}
if (masterEnv != null) {
// Add cconf, hconf, metrics.properties, logback for master environment
localizeResources.add(new LocalizeResource(saveCConf(cConfCopy, tempDir)));
Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
metricsConfPath = metricsConf.getAbsolutePath();
localizeResources.add(new LocalizeResource(metricsConf));
File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
if (logbackJar != null) {
localizeResources.add(new LocalizeResource(logbackJar, true));
}
// Localize all the files from user resources
List<File> files = copyUserResources(context.getLocalizeResources(), tempDir);
for (File file : files) {
localizeResources.add(new LocalizeResource(file));
}
if (cConfCopy.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
// In case of spark-on-k8s, artifactFetcherService is used by spark-drivers for fetching artifacts bundle.
Location location = createBundle(new File("./artifacts").getAbsoluteFile().toPath());
artifactFetcherService = new ArtifactFetcherService(cConf, location);
artifactFetcherService.startAndWait();
}
} else if (isLocal) {
// In local mode, always copy (or link if local) user requested resources
copyUserResources(context.getLocalizeResources(), tempDir);
File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
metricsConfPath = metricsConf.getAbsolutePath();
extractPySparkLibrary(tempDir, extraPySparkFiles);
} else {
// Localize all user requested files in distributed mode
distributedUserResources(context.getLocalizeResources(), localizeResources);
// Localize program jar and the expanding program jar
File programJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_NAME));
File expandedProgramJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_EXPANDED_NAME));
// Localize both the unexpanded and expanded program jar
localizeResources.add(new LocalizeResource(programJar));
localizeResources.add(new LocalizeResource(expandedProgramJar, true));
if (pluginArchive != null) {
localizeResources.add(new LocalizeResource(pluginArchive, true));
}
// Create and localize the launcher jar, which is for setting up services and classloader for spark containers
localizeResources.add(new LocalizeResource(createLauncherJar(tempDir)));
// Create metrics conf file in the current directory since
// the same value for the "spark.metrics.conf" config needs to be used for both driver and executor processes
// Also localize the metrics conf file to the executor nodes
File metricsConf = SparkMetricsSink.writeConfig(new File(CDAP_METRICS_PROPERTIES));
metricsConfPath = metricsConf.getName();
localizeResources.add(new LocalizeResource(metricsConf));
prepareHBaseDDLExecutorResources(tempDir, cConfCopy, localizeResources);
// Localize the cConf file
localizeResources.add(new LocalizeResource(saveCConf(cConfCopy, tempDir)));
// Preserves and localize runtime information in the hConf
Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
// Joiner for creating classpath for spark containers
Joiner joiner = Joiner.on(File.pathSeparator).skipNulls();
// Localize the spark.jar archive, which contains all CDAP and dependency jars
File sparkJar = new File(tempDir, CDAP_SPARK_JAR);
classpath = joiner.join(Iterables.transform(buildDependencyJar(sparkJar), new Function<String, String>() {
@Override
public String apply(String name) {
return Paths.get("$PWD", CDAP_SPARK_JAR, name).toString();
}
}));
localizeResources.add(new LocalizeResource(sparkJar, true));
// Localize logback if there is one. It is placed at the beginning of the classpath
File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
if (logbackJar != null) {
localizeResources.add(new LocalizeResource(logbackJar));
classpath = joiner.join(Paths.get("$PWD", logbackJar.getName()), classpath);
}
// Localize extra jars and append to the end of the classpath
List<String> extraJars = new ArrayList<>();
for (URI jarURI : CConfigurationUtil.getExtraJars(cConfCopy)) {
extraJars.add(Paths.get("$PWD", LocalizationUtils.getLocalizedName(jarURI)).toString());
localizeResources.add(new LocalizeResource(jarURI, false));
}
classpath = joiner.join(classpath, joiner.join(extraJars));
}
Iterable<URI> pyFiles = Collections.emptyList();
if (context.isPySpark()) {
extraPySparkFiles.add(PySparkUtil.createPySparkLib(tempDir));
pyFiles = Iterables.concat(Iterables.transform(extraPySparkFiles, FILE_TO_URI), context.getAdditionalPythonLocations());
}
final Map<String, String> configs = createSubmitConfigs(tempDir, metricsConfPath, classpath, context.getLocalizeResources(), isLocal, pyFiles);
submitSpark = new Callable<ListenableFuture<RunId>>() {
@Override
public ListenableFuture<RunId> call() throws Exception {
// This happen when stop() was called whiling starting
if (!isRunning()) {
return immediateCancelledFuture();
}
return sparkSubmitter.submit(runtimeContext, configs, localizeResources, jobFile, runtimeContext.getRunId());
}
};
} catch (Throwable t) {
cleanupTask.run();
if (t instanceof Error) {
// Guava 15.0+ have this condition fixed, hence wrapping is no longer needed if upgrade to later Guava.
throw new Exception(t);
}
throw t;
}
}
use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by cdapio.
the class CustomActionExecutor method createCustomAction.
@SuppressWarnings("unchecked")
private CustomAction createCustomAction(BasicCustomActionContext context, InstantiatorFactory instantiator, ClassLoader classLoader) throws Exception {
Class<?> clz = Class.forName(context.getSpecification().getClassName(), true, classLoader);
Preconditions.checkArgument(CustomAction.class.isAssignableFrom(clz), "%s is not a CustomAction.", clz);
CustomAction action = instantiator.get(TypeToken.of((Class<? extends CustomAction>) clz)).create();
Reflections.visit(action, action.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics()));
return action;
}
Aggregations