use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testSimpleWeightedScaling.
@Test(timeout = 5000)
public void testSimpleWeightedScaling() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput)
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
dist.makeInitialAllocations();
// Total available: 70% of 10K = 7000
// 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
// Scale down to - 3000, 1000, 1000, 2000
assertEquals(3000, e1Callback.assigned);
assertEquals(1000, e2Callback.assigned);
assertEquals(1000, e3Callback.assigned);
assertEquals(2000, e4Callback.assigned);
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestMapProcessor method testMapProcessor.
@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
String dagName = "mrdag0";
String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
JobConf jobConf = new JobConf(defaultConf);
setUpJobConf(jobConf);
MRHelpers.translateMRConfToTez(jobConf);
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
Path mapInput = new Path(workDir, "map0");
MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
task.initialize();
task.run();
task.close();
sharedExecutor.shutdownNow();
OutputContext outputContext = task.getOutputContexts().iterator().next();
TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier());
// TODO NEWTEZ FIXME OutputCommitter verification
// MRTask mrTask = (MRTask)t.getProcessor();
// Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
// .getCommitter().getClass().getName());
// t.close();
Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
LOG.info("mapOutputFile = " + mapOutputFile);
IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
LongWritable key = new LongWritable();
Text value = new Text();
DataInputBuffer keyBuf = new DataInputBuffer();
DataInputBuffer valueBuf = new DataInputBuffer();
long prev = Long.MIN_VALUE;
while (reader.nextRawKey(keyBuf)) {
reader.nextRawValue(valueBuf);
key.readFields(keyBuf);
value.readFields(valueBuf);
if (prev != Long.MIN_VALUE) {
assert (prev <= key.get());
prev = key.get();
}
LOG.info("key = " + key.get() + "; value = " + value);
}
reader.close();
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestMROutput method createMockOutputContext.
private OutputContext createMockOutputContext(UserPayload payload) {
OutputContext outputContext = mock(OutputContext.class);
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
when(outputContext.getUserPayload()).thenReturn(payload);
when(outputContext.getApplicationId()).thenReturn(appId);
when(outputContext.getTaskVertexIndex()).thenReturn(1);
when(outputContext.getTaskAttemptNumber()).thenReturn(1);
when(outputContext.getCounters()).thenReturn(new TezCounters());
return outputContext;
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestMROutput method testOldAPI_WorkOutputPathOutputFormat.
// test to try and use the WorkOutputPathOutputFormat - this checks that the workOutput path is
// set while creating recordWriters
@Test(timeout = 5000)
public void testOldAPI_WorkOutputPathOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath).build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(false, output.isMapperOutput);
assertEquals(false, output.useNewApi);
assertEquals(OldAPI_WorkOutputPathReadingOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertNotNull(output.oldApiTaskAttemptContext);
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestMROutput method testOldAPI_TextOutputFormat.
@Test(timeout = 5000)
public void testOldAPI_TextOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath).build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(false, output.isMapperOutput);
assertEquals(false, output.useNewApi);
assertEquals(org.apache.hadoop.mapred.TextOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertNotNull(output.oldApiTaskAttemptContext);
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
Aggregations