Search in sources :

Example 26 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testNewAPI_WorkOutputPathOutputFormat.

// test to try and use the WorkOutputPathOutputFormat - this checks that the getDefaultWorkFile is
// set while creating recordWriters
@Test(timeout = 5000)
public void testNewAPI_WorkOutputPathOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    Configuration conf = new Configuration();
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(true, output.isMapperOutput);
    assertEquals(true, output.useNewApi);
    assertEquals(NewAPI_WorkOutputPathReadingOutputFormat.class, output.newOutputFormat.getClass());
    assertNull(output.oldOutputFormat);
    assertNotNull(output.newApiTaskAttemptContext);
    assertNull(output.oldApiTaskAttemptContext);
    assertNotNull(output.newRecordWriter);
    assertNull(output.oldRecordWriter);
    assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 27 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutputLegacy method testNewAPI_MR.

// simulate the behavior of translating MR to DAG using MR new API
@Test(timeout = 5000)
public void testNewAPI_MR() throws Exception {
    String outputPath = "/tmp/output";
    Job job = Job.getInstance();
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.getConfiguration().setBoolean("mapred.reducer.new-api", true);
    // the output is attached to reducer
    job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
    DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);
    OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
    MROutputLegacy output = new MROutputLegacy(outputContext, 2);
    output.initialize();
    assertEquals(true, output.useNewApi);
    assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
    assertNull(output.oldOutputFormat);
    assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.oldApiTaskAttemptContext);
    assertNotNull(output.newRecordWriter);
    assertNull(output.oldRecordWriter);
    assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
Also used : Path(org.apache.hadoop.fs.Path) UserPayload(org.apache.tez.dag.api.UserPayload) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) Job(org.apache.hadoop.mapreduce.Job) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 28 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutputLegacy method testNewAPI_MapperOnly.

// simulate the behavior of translating mapper-only job to DAG using MR new API
@Test(timeout = 5000)
public void testNewAPI_MapperOnly() throws Exception {
    String outputPath = "/tmp/output";
    Job job = Job.getInstance();
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.getConfiguration().setBoolean("mapred.mapper.new-api", true);
    // the output is attached to mapper
    job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexPayload);
    DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);
    OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
    MROutputLegacy output = new MROutputLegacy(outputContext, 2);
    output.initialize();
    assertEquals(true, output.useNewApi);
    assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
    assertNull(output.oldOutputFormat);
    assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
    assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
    assertNull(output.oldApiTaskAttemptContext);
    assertNotNull(output.newRecordWriter);
    assertNull(output.oldRecordWriter);
    assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
Also used : Path(org.apache.hadoop.fs.Path) UserPayload(org.apache.tez.dag.api.UserPayload) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) Job(org.apache.hadoop.mapreduce.Job) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 29 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMemoryDistributor method testScalingProcessor.

@Test(timeout = 5000)
public void testScalingProcessor() throws TezException {
    MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
    dist.setJvmMemory(10000l);
    // First request
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor();
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor();
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - processor
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e4ProcessorContext1 = createTestProcessortContext();
    ProcessorDescriptor e4ProcessorDesc1 = createTestProcessorDescriptor();
    dist.requestMemory(5000, e4Callback, e4ProcessorContext1, e4ProcessorDesc1);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests - 10K, 10K, 5K, 5K
    // Scale down to - 2333.33, 2333.33, 1166.66, 1166.66
    assertTrue(e1Callback.assigned >= 2333 && e1Callback.assigned <= 2334);
    assertTrue(e2Callback.assigned >= 2333 && e2Callback.assigned <= 2334);
    assertTrue(e3Callback.assigned >= 1166 && e3Callback.assigned <= 1167);
    assertTrue(e4Callback.assigned >= 1166 && e4Callback.assigned <= 1167);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Example 30 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMemoryDistributor method testReserveFractionConfigured.

@Test(timeout = 5000)
public void testReserveFractionConfigured() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.5d);
    MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
    dist.setJvmMemory(10000l);
    // First request
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor();
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor();
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 50% of 10K = 7000
    // 3 requests - 10K, 10K, 5K
    // Scale down to - 2000, 2000, 1000
    assertEquals(2000, e1Callback.assigned);
    assertEquals(2000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Aggregations

OutputContext (org.apache.tez.runtime.api.OutputContext)61 Test (org.junit.Test)38 Configuration (org.apache.hadoop.conf.Configuration)19 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)15 MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)14 TezCounters (org.apache.tez.common.counters.TezCounters)13 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)13 UserPayload (org.apache.tez.dag.api.UserPayload)13 Path (org.apache.hadoop.fs.Path)12 Event (org.apache.tez.runtime.api.Event)12 ByteString (com.google.protobuf.ByteString)11 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)11 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InputContext (org.apache.tez.runtime.api.InputContext)9 BitSet (java.util.BitSet)8 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)8 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)8 ByteBuffer (java.nio.ByteBuffer)6 Text (org.apache.hadoop.io.Text)6