Search in sources :

Example 6 with MapReduceSpecification

use of io.cdap.cdap.api.mapreduce.MapReduceSpecification in project cdap by caskdata.

the class MapReduceSpecificationCodec method deserialize.

@Override
public MapReduceSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
    JsonObject jsonObj = json.getAsJsonObject();
    String className = jsonObj.get("className").getAsString();
    String name = jsonObj.get("name").getAsString();
    String description = jsonObj.get("description").getAsString();
    Map<String, Plugin> plugins = deserializeMap(jsonObj.get("plugins"), context, Plugin.class);
    Resources driverResources = deserializeResources(jsonObj, "driver", context);
    Resources mapperResources = deserializeResources(jsonObj, "mapper", context);
    Resources reducerResources = deserializeResources(jsonObj, "reducer", context);
    JsonElement inputDataSetElem = jsonObj.get("inputDataSet");
    String inputDataSet = inputDataSetElem == null ? null : inputDataSetElem.getAsString();
    JsonElement outputDataSetElem = jsonObj.get("outputDataSet");
    String outputDataSet = outputDataSetElem == null ? null : outputDataSetElem.getAsString();
    Set<String> dataSets = deserializeSet(jsonObj.get("datasets"), context, String.class);
    Map<String, String> properties = deserializeMap(jsonObj.get("properties"), context, String.class);
    return new MapReduceSpecification(className, name, description, inputDataSet, outputDataSet, dataSets, properties, driverResources, mapperResources, reducerResources, plugins);
}
Also used : JsonElement(com.google.gson.JsonElement) MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) JsonObject(com.google.gson.JsonObject) Resources(io.cdap.cdap.api.Resources) Plugin(io.cdap.cdap.api.plugin.Plugin)

Example 7 with MapReduceSpecification

use of io.cdap.cdap.api.mapreduce.MapReduceSpecification in project cdap by cdapio.

the class MapReduceProgramRunner method run.

@Override
public ProgramController run(final Program program, ProgramOptions options) {
    // Extract and verify parameters
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
    MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
    Arguments arguments = options.getArguments();
    RunId runId = ProgramRunners.getRunId(options);
    WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
    DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
    // Setup dataset framework context, if required
    if (programDatasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    MapReduce mapReduce;
    try {
        mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
    } catch (Exception e) {
        LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
        throw Throwables.propagate(e);
    }
    // List of all Closeable resources that needs to be cleanup
    List<Closeable> closeables = new ArrayList<>();
    try {
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.add(pluginInstantiator);
        }
        final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
        closeables.add(context);
        Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
        // note: this sets logging context on the thread level
        LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
        // Set the job queue to hConf if it is provided
        Configuration hConf = new Configuration(this.hConf);
        String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
        if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
            hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
        }
        ClusterMode clusterMode = ProgramRunners.getClusterMode(options);
        Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, clusterMode, fieldLineageWriter);
        mapReduceRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
        LOG.debug("Starting MapReduce Job: {}", context);
        // be running the job, but the data directory will be owned by cdap.
        if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
            mapReduceRuntimeService.start();
        } else {
            ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
        }
        return controller;
    } catch (Exception e) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) ClusterMode(io.cdap.cdap.app.guice.ClusterMode) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) MapReduce(io.cdap.cdap.api.mapreduce.MapReduce) DatasetFramework(io.cdap.cdap.data2.dataset2.DatasetFramework) NameMappedDatasetFramework(io.cdap.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) InstantiatorFactory(io.cdap.cdap.common.lang.InstantiatorFactory) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(io.cdap.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramController(io.cdap.cdap.app.runtime.ProgramController) MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) Arguments(io.cdap.cdap.app.runtime.Arguments) MessagingService(io.cdap.cdap.messaging.MessagingService) Service(com.google.common.util.concurrent.Service) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) ProgramId(io.cdap.cdap.proto.id.ProgramId) BasicProgramContext(io.cdap.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter) PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) WorkflowProgramInfo(io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramContextAware(io.cdap.cdap.data.ProgramContextAware)

Example 8 with MapReduceSpecification

use of io.cdap.cdap.api.mapreduce.MapReduceSpecification in project cdap by cdapio.

the class DefaultMapReduceConfigurer method createSpecification.

public MapReduceSpecification createSpecification() {
    Set<String> datasets = new HashSet<>();
    Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldExtractor(properties), new DataSetFieldExtractor(datasets));
    return new MapReduceSpecification(mapReduce.getClass().getName(), name, description, inputDataset, outputDataset, datasets, properties, driverResources, mapperResources, reducerResources, getPlugins());
}
Also used : MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) PropertyFieldExtractor(io.cdap.cdap.internal.specification.PropertyFieldExtractor) DataSetFieldExtractor(io.cdap.cdap.internal.specification.DataSetFieldExtractor) HashSet(java.util.HashSet)

Example 9 with MapReduceSpecification

use of io.cdap.cdap.api.mapreduce.MapReduceSpecification in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testMapReduceDriverResources.

@Test
public void testMapReduceDriverResources() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    MapReduceSpecification mrSpec = app.getSpecification().getMapReduce().get(AppWithMapReduce.ClassicWordCount.class.getSimpleName());
    Assert.assertEquals(AppWithMapReduce.ClassicWordCount.MEMORY_MB, mrSpec.getDriverResources().getMemoryMB());
}
Also used : ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) Test(org.junit.Test)

Example 10 with MapReduceSpecification

use of io.cdap.cdap.api.mapreduce.MapReduceSpecification in project cdap by caskdata.

the class MapReduceProgramRunner method run.

@Override
public ProgramController run(final Program program, ProgramOptions options) {
    // Extract and verify parameters
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
    MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
    Arguments arguments = options.getArguments();
    RunId runId = ProgramRunners.getRunId(options);
    WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
    DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
    // Setup dataset framework context, if required
    if (programDatasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    MapReduce mapReduce;
    try {
        mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
    } catch (Exception e) {
        LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
        throw Throwables.propagate(e);
    }
    // List of all Closeable resources that needs to be cleanup
    List<Closeable> closeables = new ArrayList<>();
    try {
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.add(pluginInstantiator);
        }
        final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory);
        closeables.add(context);
        Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
        // note: this sets logging context on the thread level
        LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
        // Set the job queue to hConf if it is provided
        Configuration hConf = new Configuration(this.hConf);
        String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
        if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
            hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
        }
        ClusterMode clusterMode = ProgramRunners.getClusterMode(options);
        Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, clusterMode, fieldLineageWriter);
        mapReduceRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
        LOG.debug("Starting MapReduce Job: {}", context);
        // be running the job, but the data directory will be owned by cdap.
        if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
            mapReduceRuntimeService.start();
        } else {
            ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
        }
        return controller;
    } catch (Exception e) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) ClusterMode(io.cdap.cdap.app.guice.ClusterMode) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) MapReduce(io.cdap.cdap.api.mapreduce.MapReduce) DatasetFramework(io.cdap.cdap.data2.dataset2.DatasetFramework) NameMappedDatasetFramework(io.cdap.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) InstantiatorFactory(io.cdap.cdap.common.lang.InstantiatorFactory) MetricsFieldSetter(io.cdap.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(io.cdap.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramController(io.cdap.cdap.app.runtime.ProgramController) MapReduceSpecification(io.cdap.cdap.api.mapreduce.MapReduceSpecification) Arguments(io.cdap.cdap.app.runtime.Arguments) MessagingService(io.cdap.cdap.messaging.MessagingService) Service(com.google.common.util.concurrent.Service) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) ProgramId(io.cdap.cdap.proto.id.ProgramId) BasicProgramContext(io.cdap.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(io.cdap.cdap.internal.app.runtime.DataSetFieldSetter) PropertyFieldSetter(io.cdap.cdap.common.lang.PropertyFieldSetter) WorkflowProgramInfo(io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramContextAware(io.cdap.cdap.data.ProgramContextAware)

Aggregations

MapReduceSpecification (io.cdap.cdap.api.mapreduce.MapReduceSpecification)22 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)10 JsonObject (com.google.gson.JsonObject)4 MetricsCollectionService (io.cdap.cdap.api.metrics.MetricsCollectionService)4 Plugin (io.cdap.cdap.api.plugin.Plugin)4 ServiceSpecification (io.cdap.cdap.api.service.ServiceSpecification)4 SparkSpecification (io.cdap.cdap.api.spark.SparkSpecification)4 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)4 ProgramContextAware (io.cdap.cdap.data.ProgramContextAware)4 DatasetFramework (io.cdap.cdap.data2.dataset2.DatasetFramework)4 BasicProgramContext (io.cdap.cdap.internal.app.runtime.BasicProgramContext)4 NameMappedDatasetFramework (io.cdap.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework)4 WorkflowProgramInfo (io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo)4 MessagingService (io.cdap.cdap.messaging.MessagingService)4 ProgramType (io.cdap.cdap.proto.ProgramType)4 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)3 ProgramId (io.cdap.cdap.proto.id.ProgramId)3 Test (org.junit.Test)3 CacheLoader (com.google.common.cache.CacheLoader)2 Service (com.google.common.util.concurrent.Service)2