Search in sources :

Example 21 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testSimpleWeightedScaling.

@Test(timeout = 5000)
public void testSimpleWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 22 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMapProcessor method testMapProcessor.

@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    task.initialize();
    task.run();
    task.close();
    sharedExecutor.shutdownNow();
    OutputContext outputContext = task.getOutputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier());
    // TODO NEWTEZ FIXME OutputCommitter verification
    // MRTask mrTask = (MRTask)t.getProcessor();
    // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
    // .getCommitter().getClass().getName());
    // t.close();
    Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
        reader.nextRawValue(valueBuf);
        key.readFields(keyBuf);
        value.readFields(valueBuf);
        if (prev != Long.MIN_VALUE) {
            assert (prev <= key.get());
            prev = key.get();
        }
        LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) Text(org.apache.hadoop.io.Text) OutputContext(org.apache.tez.runtime.api.OutputContext) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) LongWritable(org.apache.hadoop.io.LongWritable) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput) Test(org.junit.Test)

Example 23 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method createMockOutputContext.

private OutputContext createMockOutputContext(UserPayload payload) {
    OutputContext outputContext = mock(OutputContext.class);
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    when(outputContext.getUserPayload()).thenReturn(payload);
    when(outputContext.getApplicationId()).thenReturn(appId);
    when(outputContext.getTaskVertexIndex()).thenReturn(1);
    when(outputContext.getTaskAttemptNumber()).thenReturn(1);
    when(outputContext.getCounters()).thenReturn(new TezCounters());
    return outputContext;
}
Also used : ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) OutputContext(org.apache.tez.runtime.api.OutputContext) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 24 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testOldAPI_WorkOutputPathOutputFormat.

// test to try and use the WorkOutputPathOutputFormat - this checks that the workOutput path is
// set while creating recordWriters
@Test(timeout = 5000)
public void testOldAPI_WorkOutputPathOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    Configuration conf = new Configuration();
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(false, output.isMapperOutput);
    assertEquals(false, output.useNewApi);
    assertEquals(OldAPI_WorkOutputPathReadingOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertNotNull(output.oldApiTaskAttemptContext);
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 25 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestMROutput method testOldAPI_TextOutputFormat.

@Test(timeout = 5000)
public void testOldAPI_TextOutputFormat() throws Exception {
    String outputPath = "/tmp/output";
    Configuration conf = new Configuration();
    conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath).build();
    OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
    MROutput output = new MROutput(outputContext, 2);
    output.initialize();
    assertEquals(false, output.isMapperOutput);
    assertEquals(false, output.useNewApi);
    assertEquals(org.apache.hadoop.mapred.TextOutputFormat.class, output.oldOutputFormat.getClass());
    assertNull(output.newOutputFormat);
    assertNotNull(output.oldApiTaskAttemptContext);
    assertNull(output.newApiTaskAttemptContext);
    assertNotNull(output.oldRecordWriter);
    assertNull(output.newRecordWriter);
    assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Aggregations

OutputContext (org.apache.tez.runtime.api.OutputContext)61 Test (org.junit.Test)38 Configuration (org.apache.hadoop.conf.Configuration)19 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)15 MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)14 TezCounters (org.apache.tez.common.counters.TezCounters)13 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)13 UserPayload (org.apache.tez.dag.api.UserPayload)13 Path (org.apache.hadoop.fs.Path)12 Event (org.apache.tez.runtime.api.Event)12 ByteString (com.google.protobuf.ByteString)11 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)11 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InputContext (org.apache.tez.runtime.api.InputContext)9 BitSet (java.util.BitSet)8 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)8 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)8 ByteBuffer (java.nio.ByteBuffer)6 Text (org.apache.hadoop.io.Text)6