use of co.cask.cdap.api.mapreduce.MapReduceContext in project cdap by caskdata.
the class StreamToDataset method initialize.
@Override
public void initialize() throws Exception {
MapReduceContext context = getContext();
Job job = context.getHadoopJob();
job.setNumReduceTasks(0);
WorkflowToken workflowToken = context.getWorkflowToken();
Class<? extends Mapper> mapper = PageTitleToDatasetMapper.class;
String inputStream = WikipediaPipelineApp.PAGE_TITLES_STREAM;
String outputDataset = WikipediaPipelineApp.PAGE_TITLES_DATASET;
if (workflowToken != null) {
Value likesToDatasetResult = workflowToken.get("result", WikipediaPipelineApp.LIKES_TO_DATASET_MR_NAME);
if (likesToDatasetResult != null && likesToDatasetResult.getAsBoolean()) {
// The "likes" stream to the dataset has already run and has been successful in this run so far.
// Now run raw wikipedia stream to dataset.
mapper = RawWikiDataToDatasetMapper.class;
inputStream = WikipediaPipelineApp.RAW_WIKIPEDIA_STREAM;
outputDataset = WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET;
}
}
LOG.info("Using '{}' as the input stream and '{}' as the output dataset.", inputStream, outputDataset);
job.setMapperClass(mapper);
String dataNamespace = context.getRuntimeArguments().get(WikipediaPipelineApp.NAMESPACE_ARG);
dataNamespace = dataNamespace == null ? getContext().getNamespace() : dataNamespace;
context.addInput(Input.ofStream(inputStream).fromNamespace(dataNamespace));
context.addOutput(Output.ofDataset(outputDataset).fromNamespace(dataNamespace));
}
use of co.cask.cdap.api.mapreduce.MapReduceContext in project cdap by caskdata.
the class TopNMapReduce method initialize.
@Override
public void initialize() throws Exception {
MapReduceContext context = getContext();
Map<String, String> runtimeArguments = context.getRuntimeArguments();
Job job = context.getHadoopJob();
WorkflowToken workflowToken = context.getWorkflowToken();
int topNRank = 10;
if (runtimeArguments.containsKey("topn.rank")) {
topNRank = Integer.parseInt(runtimeArguments.get("topn.rank"));
}
if (workflowToken != null) {
workflowToken.put("topn.rank", Value.of(topNRank));
}
int numReduceTasks = 1;
if (runtimeArguments.containsKey("num.reduce.tasks")) {
numReduceTasks = Integer.parseInt(runtimeArguments.get("num.reduce.tasks"));
}
job.setNumReduceTasks(numReduceTasks);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(TopNReducer.class);
String dataNamespace = runtimeArguments.get(WikipediaPipelineApp.NAMESPACE_ARG);
dataNamespace = dataNamespace == null ? getContext().getNamespace() : dataNamespace;
context.addInput(Input.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET).fromNamespace(dataNamespace));
context.addOutput(Output.ofDataset(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT).fromNamespace(dataNamespace));
}
use of co.cask.cdap.api.mapreduce.MapReduceContext in project cdap by caskdata.
the class WikiContentValidatorAndNormalizer method initialize.
@Override
public void initialize() throws Exception {
MapReduceContext context = getContext();
Job job = context.getHadoopJob();
job.setMapperClass(FilterNormalizerMapper.class);
job.setNumReduceTasks(0);
String dataNamespace = context.getRuntimeArguments().get(WikipediaPipelineApp.NAMESPACE_ARG);
dataNamespace = dataNamespace == null ? getContext().getNamespace() : dataNamespace;
context.addInput(Input.ofDataset(WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET).fromNamespace(dataNamespace));
context.addOutput(Output.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET).fromNamespace(dataNamespace));
}
use of co.cask.cdap.api.mapreduce.MapReduceContext in project cdap by caskdata.
the class PurchaseHistoryBuilder method initialize.
@Override
public void initialize() throws Exception {
MapReduceContext context = getContext();
Job job = context.getHadoopJob();
job.setReducerClass(PerUserReducer.class);
context.addInput(Input.ofDataset("purchases"), PurchaseMapper.class);
context.addOutput(Output.ofDataset("history"));
// override default memory usage if the corresponding runtime arguments are set.
Map<String, String> runtimeArgs = context.getRuntimeArguments();
String mapperMemoryMBStr = runtimeArgs.get(MAPPER_MEMORY_MB);
if (mapperMemoryMBStr != null) {
context.setMapperResources(new Resources(Integer.parseInt(mapperMemoryMBStr)));
}
String reducerMemoryMBStr = runtimeArgs.get(REDUCER_MEMORY_MB);
if (reducerMemoryMBStr != null) {
context.setReducerResources(new Resources(Integer.parseInt(reducerMemoryMBStr)));
}
}
use of co.cask.cdap.api.mapreduce.MapReduceContext in project cdap by caskdata.
the class WordCount method initialize.
@Override
public void initialize() throws Exception {
MapReduceContext context = getContext();
Job job = context.getHadoopJob();
job.setMapperClass(Tokenizer.class);
job.setReducerClass(Counter.class);
job.setNumReduceTasks(1);
String inputDataset = context.getRuntimeArguments().get("input");
inputDataset = inputDataset != null ? inputDataset : "lines";
String outputDataset = context.getRuntimeArguments().get("output");
outputDataset = outputDataset != null ? outputDataset : "counts";
context.addInput(Input.ofDataset(inputDataset));
context.addOutput(Output.ofDataset(outputDataset));
}
Aggregations