Search in sources :

Example 6 with DataSetFieldSetter

use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.

the class CustomActionExecutor method createCustomAction.

@SuppressWarnings("unchecked")
private CustomAction createCustomAction(BasicCustomActionContext context, InstantiatorFactory instantiator, ClassLoader classLoader) throws Exception {
    Class<?> clz = Class.forName(context.getSpecification().getClassName(), true, classLoader);
    Preconditions.checkArgument(CustomAction.class.isAssignableFrom(clz), "%s is not a CustomAction.", clz);
    CustomAction action = instantiator.get(TypeToken.of((Class<? extends CustomAction>) clz)).create();
    Reflections.visit(action, action.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics()));
    return action;
}
Also used : PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter)

Example 7 with DataSetFieldSetter

use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.

the class MapReduceProgramRunner method run.

@Override
public ProgramController run(final Program program, ProgramOptions options) {
    // Extract and verify parameters
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
    MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
    Arguments arguments = options.getArguments();
    RunId runId = ProgramRunners.getRunId(options);
    WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
    DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
    // Setup dataset framework context, if required
    if (programDatasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    MapReduce mapReduce;
    try {
        mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
    } catch (Exception e) {
        LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
        throw Throwables.propagate(e);
    }
    // List of all Closeable resources that needs to be cleanup
    List<Closeable> closeables = new ArrayList<>();
    try {
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.add(pluginInstantiator);
        }
        final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
        closeables.add(context);
        Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
        // note: this sets logging context on the thread level
        LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
        // Set the job queue to hConf if it is provided
        Configuration hConf = new Configuration(this.hConf);
        String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
        if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
            hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
        }
        ClusterMode clusterMode = ProgramRunners.getClusterMode(options);
        Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, clusterMode, fieldLineageWriter);
        mapReduceRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
        LOG.debug("Starting MapReduce Job: {}", context);
        // be running the job, but the data directory will be owned by cdap.
        if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
            mapReduceRuntimeService.start();
        } else {
            ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
        }
        return controller;
    } catch (Exception e) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) ClusterMode(io.cdap.cdap.app.guice.ClusterMode) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) MapReduce(io.cdap.cdap.api.mapreduce.MapReduce) DatasetFramework(io.cdap.cdap.data2.dataset2.DatasetFramework) NameMappedDatasetFramework(io.cdap.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) InstantiatorFactory(io.cdap.cdap.common.lang.InstantiatorFactory) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(io.cdap.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramController(io.cdap.cdap.app.runtime.ProgramController) MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) Arguments(io.cdap.cdap.app.runtime.Arguments) MessagingService(io.cdap.cdap.messaging.MessagingService) Service(com.google.common.util.concurrent.Service) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) ProgramId(io.cdap.cdap.proto.id.ProgramId) BasicProgramContext(io.cdap.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter) PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) WorkflowProgramInfo(io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramContextAware(io.cdap.cdap.data.ProgramContextAware)

Example 8 with DataSetFieldSetter

use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.

the class ReducerWrapper method run.

@SuppressWarnings("unchecked")
@Override
public void run(Context context) throws IOException, InterruptedException {
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
    ClassLoader weakReferenceClassLoader = new WeakReferenceDelegatorClassLoader(classLoader);
    BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);
    long metricsReportInterval = basicMapReduceContext.getMetricsReportIntervalMillis();
    final ReduceTaskMetricsWriter reduceTaskMetricsWriter = new ReduceTaskMetricsWriter(basicMapReduceContext.getProgramMetrics(), context);
    // this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
    WrappedReducer.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext, reduceTaskMetricsWriter);
    basicMapReduceContext.setHadoopContext(flushingContext);
    String userReducer = context.getConfiguration().get(ATTR_REDUCER_CLASS);
    ClassLoader programClassLoader = classLoader.getProgramClassLoader();
    Reducer delegate = createReducerInstance(programClassLoader, userReducer);
    // injecting runtime components, like datasets, etc.
    try {
        Reflections.visit(delegate, delegate.getClass(), new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()), new MetricsFieldSetter(basicMapReduceContext.getMetrics()), new DataSetFieldSetter(basicMapReduceContext));
    } catch (Throwable t) {
        LOG.error("Failed to inject fields to {}.", delegate.getClass(), t);
        throw Throwables.propagate(t);
    }
    ClassLoader oldClassLoader;
    if (delegate instanceof ProgramLifecycle) {
        oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
        try {
            ((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
        } catch (Exception e) {
            LOG.error("Failed to initialize reducer with {}", basicMapReduceContext, e);
            throw Throwables.propagate(e);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }
    }
    oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
    try {
        delegate.run(flushingContext);
    } finally {
        ClassLoaders.setContextClassLoader(oldClassLoader);
    }
    // memory by tx agent)
    try {
        basicMapReduceContext.flushOperations();
    } catch (Exception e) {
        LOG.error("Failed to flush operations at the end of reducer of " + basicMapReduceContext, e);
        throw Throwables.propagate(e);
    }
    // Close all writers created by MultipleOutputs
    basicMapReduceContext.closeMultiOutputs();
    if (delegate instanceof ProgramLifecycle) {
        oldClassLoader = ClassLoaders.setContextClassLoader(weakReferenceClassLoader);
        try {
            ((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
        } catch (Exception e) {
            LOG.error("Error during destroy of reducer {}", basicMapReduceContext, e);
        // Do nothing, try to finish
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }
    }
    reduceTaskMetricsWriter.reportMetrics();
}
Also used : ProgramLifecycle(io.cdap.cdap.api.ProgramLifecycle) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter) IOException(java.io.IOException) WeakReferenceDelegatorClassLoader(io.cdap.cdap.common.lang.WeakReferenceDelegatorClassLoader) PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) WeakReferenceDelegatorClassLoader(io.cdap.cdap.common.lang.WeakReferenceDelegatorClassLoader) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) Reducer(org.apache.hadoop.mapreduce.Reducer) RuntimeContext(io.cdap.cdap.api.RuntimeContext)

Example 9 with DataSetFieldSetter

use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by caskdata.

the class SparkRuntimeService method startUp.

@Override
protected void startUp() throws Exception {
    // additional spark job initialization at run-time
    // This context is for calling initialize and onFinish on the Spark program
    // Fields injection for the Spark program
    // It has to be done in here instead of in SparkProgramRunner for the @UseDataset injection
    // since the dataset cache being used in Spark is a MultiThreadDatasetCache
    // The AbstractExecutionThreadService guarantees that startUp(), run() and shutDown() all happens in the same thread
    Reflections.visit(spark, spark.getClass(), new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
    // Since we are updating cConf, make a copy of it to which updates will be made.
    CConfiguration cConfCopy = CConfiguration.copy(cConf);
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = DirUtils.createTempDir(new File(cConfCopy.get(Constants.CFG_LOCAL_DATA_DIR), cConfCopy.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
    tempDir.mkdirs();
    this.cleanupTask = createCleanupTask(tempDir, System.getProperties());
    try {
        initialize();
        SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(runtimeContext.getConfiguration());
        final List<LocalizeResource> localizeResources = new ArrayList<>();
        final URI jobFile = context.isPySpark() ? getPySparkScript(tempDir) : createJobJar(tempDir);
        List<File> extraPySparkFiles = new ArrayList<>();
        String metricsConfPath;
        String classpath = "";
        // Setup the SparkConf with properties from spark-defaults.conf
        Properties sparkDefaultConf = SparkPackageUtils.getSparkDefaultConf();
        for (String key : sparkDefaultConf.stringPropertyNames()) {
            SparkRuntimeEnv.setProperty(key, sparkDefaultConf.getProperty(key));
        }
        if (masterEnv != null) {
            // Add cconf, hconf, metrics.properties, logback for master environment
            localizeResources.add(new LocalizeResource(saveCConf(cConfCopy, tempDir)));
            Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
            localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
            File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
            metricsConfPath = metricsConf.getAbsolutePath();
            localizeResources.add(new LocalizeResource(metricsConf));
            File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
            if (logbackJar != null) {
                localizeResources.add(new LocalizeResource(logbackJar, true));
            }
            // Localize all the files from user resources
            List<File> files = copyUserResources(context.getLocalizeResources(), tempDir);
            for (File file : files) {
                localizeResources.add(new LocalizeResource(file));
            }
            if (cConfCopy.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
                // In case of spark-on-k8s, artifactFetcherService is used by spark-drivers for fetching artifacts bundle.
                Location location = createBundle(new File("./artifacts").getAbsoluteFile().toPath());
                artifactFetcherService = new ArtifactFetcherService(cConf, location);
                artifactFetcherService.startAndWait();
            }
        } else if (isLocal) {
            // In local mode, always copy (or link if local) user requested resources
            copyUserResources(context.getLocalizeResources(), tempDir);
            File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
            metricsConfPath = metricsConf.getAbsolutePath();
            extractPySparkLibrary(tempDir, extraPySparkFiles);
        } else {
            // Localize all user requested files in distributed mode
            distributedUserResources(context.getLocalizeResources(), localizeResources);
            // Localize program jar and the expanding program jar
            File programJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_NAME));
            File expandedProgramJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_EXPANDED_NAME));
            // Localize both the unexpanded and expanded program jar
            localizeResources.add(new LocalizeResource(programJar));
            localizeResources.add(new LocalizeResource(expandedProgramJar, true));
            if (pluginArchive != null) {
                localizeResources.add(new LocalizeResource(pluginArchive, true));
            }
            // Create and localize the launcher jar, which is for setting up services and classloader for spark containers
            localizeResources.add(new LocalizeResource(createLauncherJar(tempDir)));
            // Create metrics conf file in the current directory since
            // the same value for the "spark.metrics.conf" config needs to be used for both driver and executor processes
            // Also localize the metrics conf file to the executor nodes
            File metricsConf = SparkMetricsSink.writeConfig(new File(CDAP_METRICS_PROPERTIES));
            metricsConfPath = metricsConf.getName();
            localizeResources.add(new LocalizeResource(metricsConf));
            prepareHBaseDDLExecutorResources(tempDir, cConfCopy, localizeResources);
            // Localize the cConf file
            localizeResources.add(new LocalizeResource(saveCConf(cConfCopy, tempDir)));
            // Preserves and localize runtime information in the hConf
            Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
            localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
            // Joiner for creating classpath for spark containers
            Joiner joiner = Joiner.on(File.pathSeparator).skipNulls();
            // Localize the spark.jar archive, which contains all CDAP and dependency jars
            File sparkJar = new File(tempDir, CDAP_SPARK_JAR);
            classpath = joiner.join(Iterables.transform(buildDependencyJar(sparkJar), new Function<String, String>() {

                @Override
                public String apply(String name) {
                    return Paths.get("$PWD", CDAP_SPARK_JAR, name).toString();
                }
            }));
            localizeResources.add(new LocalizeResource(sparkJar, true));
            // Localize logback if there is one. It is placed at the beginning of the classpath
            File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
            if (logbackJar != null) {
                localizeResources.add(new LocalizeResource(logbackJar));
                classpath = joiner.join(Paths.get("$PWD", logbackJar.getName()), classpath);
            }
            // Localize extra jars and append to the end of the classpath
            List<String> extraJars = new ArrayList<>();
            for (URI jarURI : CConfigurationUtil.getExtraJars(cConfCopy)) {
                extraJars.add(Paths.get("$PWD", LocalizationUtils.getLocalizedName(jarURI)).toString());
                localizeResources.add(new LocalizeResource(jarURI, false));
            }
            classpath = joiner.join(classpath, joiner.join(extraJars));
        }
        Iterable<URI> pyFiles = Collections.emptyList();
        if (context.isPySpark()) {
            extraPySparkFiles.add(PySparkUtil.createPySparkLib(tempDir));
            pyFiles = Iterables.concat(Iterables.transform(extraPySparkFiles, FILE_TO_URI), context.getAdditionalPythonLocations());
        }
        final Map<String, String> configs = createSubmitConfigs(tempDir, metricsConfPath, classpath, context.getLocalizeResources(), isLocal, pyFiles);
        submitSpark = new Callable<ListenableFuture<RunId>>() {

            @Override
            public ListenableFuture<RunId> call() throws Exception {
                // This happen when stop() was called whiling starting
                if (!isRunning()) {
                    return immediateCancelledFuture();
                }
                return sparkSubmitter.submit(runtimeContext, configs, localizeResources, jobFile, runtimeContext.getRunId());
            }
        };
    } catch (Throwable t) {
        cleanupTask.run();
        if (t instanceof Error) {
            // Guava 15.0+ have this condition fixed, hence wrapping is no longer needed if upgrade to later Guava.
            throw new Exception(t);
        }
        throw t;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) ArrayList(java.util.ArrayList) Properties(java.util.Properties) URI(java.net.URI) Function(com.google.common.base.Function) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) ArtifactFetcherService(io.cdap.cdap.app.runtime.spark.service.ArtifactFetcherService) List(java.util.List) ArrayList(java.util.ArrayList) Joiner(com.google.common.base.Joiner) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) File(java.io.File) Location(org.apache.twill.filesystem.Location)

Example 10 with DataSetFieldSetter

use of io.cdap.cdap.internal.app.runtime.DataSetFieldSetter in project cdap by cdapio.

the class CustomActionExecutor method createCustomAction.

@SuppressWarnings("unchecked")
private CustomAction createCustomAction(BasicCustomActionContext context, InstantiatorFactory instantiator, ClassLoader classLoader) throws Exception {
    Class<?> clz = Class.forName(context.getSpecification().getClassName(), true, classLoader);
    Preconditions.checkArgument(CustomAction.class.isAssignableFrom(clz), "%s is not a CustomAction.", clz);
    CustomAction action = instantiator.get(TypeToken.of((Class<? extends CustomAction>) clz)).create();
    Reflections.visit(action, action.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics()));
    return action;
}
Also used : PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter)

Aggregations

PropertyFieldSetter (io.cdap.cdap.common.lang.PropertyFieldSetter)12 DataSetFieldSetter (io.cdap.cdap.internal.app.runtime.DataSetFieldSetter)12 MetricsFieldSetter (io.cdap.cdap.internal.app.runtime.MetricsFieldSetter)12 IOException (java.io.IOException)6 ProgramLifecycle (io.cdap.cdap.api.ProgramLifecycle)4 RuntimeContext (io.cdap.cdap.api.RuntimeContext)4 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)4 WeakReferenceDelegatorClassLoader (io.cdap.cdap.common.lang.WeakReferenceDelegatorClassLoader)4 ArrayList (java.util.ArrayList)4 Configuration (org.apache.hadoop.conf.Configuration)4 Function (com.google.common.base.Function)2 Joiner (com.google.common.base.Joiner)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 Service (com.google.common.util.concurrent.Service)2 TransactionControl (io.cdap.cdap.api.annotation.TransactionControl)2 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)2 AbstractCustomAction (io.cdap.cdap.api.customaction.AbstractCustomAction)2 CustomAction (io.cdap.cdap.api.customaction.CustomAction)2 MapReduce (io.cdap.cdap.api.mapreduce.MapReduce)2 MapReduceSpecification (io.cdap.cdap.api.mapreduce.MapReduceSpecification)2