Search in sources :

Example 56 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testNewAPI_SequenceFileOutputFormat.

@Test(timeout = 5000)
public void testNewAPI_SequenceFileOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, SequenceFileOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(true, output.useNewApi);
    assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
    assertNull(output.oldOutputFormat);
    assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.oldApiTaskAttemptContext);
    assertNotNull(output.newRecordWriter);
    assertNull(output.oldRecordWriter);
    assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
Also used : SequenceFileOutputFormat(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat) JobConf(org.apache.hadoop.mapred.JobConf) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 57 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testOldAPI_SequenceFileOutputFormat.

@Test(timeout = 5000)
public void testOldAPI_SequenceFileOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(false, output.useNewApi);
    assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
Also used : SequenceFileOutputFormat(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat) JobConf(org.apache.hadoop.mapred.JobConf) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 58 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testNewAPI_TextOutputFormat.

@Test(timeout = 5000)
public void testNewAPI_TextOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    Configuration conf = new Configuration();
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, TextOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(true, output.isMapperOutput);
    assertEquals(true, output.useNewApi);
    assertEquals(TextOutputFormat.class, output.newOutputFormat.getClass());
    assertNull(output.oldOutputFormat);
    assertNotNull(output.newApiTaskAttemptContext);
    assertNull(output.oldApiTaskAttemptContext);
    assertNotNull(output.newRecordWriter);
    assertNull(output.oldRecordWriter);
    assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 59 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutputLegacy method testOldAPI_MR.

// simulate the behavior of translating MR to DAG using MR old API
@Test(timeout = 5000)
public void testOldAPI_MR() throws Exception {
    String outputPath = "/tmp/output";
    JobConf conf = new JobConf();
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
    // the output is attached to reducer
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
    DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);
    OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
    MROutputLegacy output = new MROutputLegacy(outputContext, 2);
    output.initialize();
    assertEquals(false, output.useNewApi);
    assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
Also used : Path(org.apache.hadoop.fs.Path) UserPayload(org.apache.tez.dag.api.UserPayload) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) JobConf(org.apache.hadoop.mapred.JobConf) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 60 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutputLegacy method createMockOutputContext.

private OutputContext createMockOutputContext(UserPayload payload) {
    OutputContext outputContext = mock(OutputContext.class);
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    when(outputContext.getUserPayload()).thenReturn(payload);
    when(outputContext.getApplicationId()).thenReturn(appId);
    when(outputContext.getTaskVertexIndex()).thenReturn(1);
    when(outputContext.getTaskAttemptNumber()).thenReturn(1);
    when(outputContext.getCounters()).thenReturn(new TezCounters());
    return outputContext;
}
Also used : ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) OutputContext(org.apache.tez.runtime.api.OutputContext) TezCounters(org.apache.tez.common.counters.TezCounters)

Aggregations

OutputContext (org.apache.tez.runtime.api.OutputContext)61 Test (org.junit.Test)38 Configuration (org.apache.hadoop.conf.Configuration)19 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)15 MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)14 TezCounters (org.apache.tez.common.counters.TezCounters)13 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)13 UserPayload (org.apache.tez.dag.api.UserPayload)13 Path (org.apache.hadoop.fs.Path)12 Event (org.apache.tez.runtime.api.Event)12 ByteString (com.google.protobuf.ByteString)11 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)11 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InputContext (org.apache.tez.runtime.api.InputContext)9 BitSet (java.util.BitSet)8 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)8 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)8 ByteBuffer (java.nio.ByteBuffer)6 Text (org.apache.hadoop.io.Text)6