use of org.apache.tez.dag.api.DataSinkDescriptor in project tez by apache.
the class TestMROutputLegacy method testOldAPI_MR.
// simulate the behavior of translating MR to DAG using MR old API
@Test(timeout = 5000)
public void testOldAPI_MR() throws Exception {
String outputPath = "/tmp/output";
JobConf conf = new JobConf();
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
// the output is attached to reducer
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);
OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
MROutputLegacy output = new MROutputLegacy(outputContext, 2);
output.initialize();
assertEquals(false, output.useNewApi);
assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
use of org.apache.tez.dag.api.DataSinkDescriptor in project tez by apache.
the class TestMROutputLegacy method testOldAPI_MapperOnly.
// simulate the behavior of translating Mapper-only job to DAG using MR old API
@Test(timeout = 5000)
public void testOldAPI_MapperOnly() throws Exception {
String outputPath = "/tmp/output";
JobConf conf = new JobConf();
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
// the output is attached to mapper
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);
OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
MROutputLegacy output = new MROutputLegacy(outputContext, 2);
output.initialize();
assertEquals(false, output.useNewApi);
assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
use of org.apache.tez.dag.api.DataSinkDescriptor in project tez by apache.
the class TestHistoryParser method runWordCount.
private String runWordCount(String tokenizerProcessor, String summationProcessor, String dagName, boolean withTimeline) throws Exception {
// HDFS path
Path outputLoc = new Path("/tmp/outPath_" + System.currentTimeMillis());
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf, TextInputFormat.class, inputLoc.toString()).build();
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, TextOutputFormat.class, outputLoc.toString()).build();
Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(tokenizerProcessor)).addDataSource(INPUT, dataSource);
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName()).build();
Vertex summationVertex = Vertex.create(SUMMATION, ProcessorDescriptor.create(summationProcessor), 1).addDataSink(OUTPUT, dataSink);
// Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
DAG dag = DAG.create(dagName);
dag.addVertex(tokenizerVertex).addVertex(summationVertex).addEdge(Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
TezClient tezClient = getTezClient(withTimeline);
// Update Caller Context
CallerContext callerContext = CallerContext.create("TezExamples", "Tez WordCount Example Job");
ApplicationId appId = tezClient.getAppMasterApplicationId();
if (appId == null) {
appId = ApplicationId.newInstance(1001l, 1);
}
callerContext.setCallerIdAndType(appId.toString(), "TezApplication");
dag.setCallerContext(callerContext);
DAGClient client = tezClient.submitDAG(dag);
client.waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
TezDAGID tezDAGID = TezDAGID.getInstance(tezClient.getAppMasterApplicationId(), 1);
if (tezClient != null) {
tezClient.stop();
}
return tezDAGID.toString();
}
Aggregations