use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScalingNonConcurrent.
@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScalingNonConcurrent() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput [weight 6]
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput [weight 2]
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput) [weight 1]
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput [weight 3]
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
dist.makeInitialAllocations();
// Total available: 60% of 10K = 6000
// 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
// Overlap input and output memory
assertEquals(4500, e1Callback.assigned);
assertEquals(1500, e2Callback.assigned);
assertEquals(1500, e3Callback.assigned);
assertEquals(4500, e4Callback.assigned);
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrentInputsDisabled.
@Test(timeout = 5000)
public void testWeightedScalingNonConcurrentInputsDisabled() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, false);
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput)
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
// Fifth request - Processor
MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
ProcessorContext e5ProcContext = createTestProcessortContext();
ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
dist.makeInitialAllocations();
// Total available: 80% of 10K = 8000
// 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
// Overlap input and output memory
assertEquals(3000, e1Callback.assigned);
assertEquals(1000, e2Callback.assigned);
assertEquals(2333, e3Callback.assigned);
assertEquals(4666, e4Callback.assigned);
assertEquals(1000, e5Callback.assigned);
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMRInput method testAttributesInJobConf.
@Test(timeout = 5000)
public void testAttributesInJobConf() throws Exception {
InputContext inputContext = mock(InputContext.class);
doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), TestInputFormat.class).groupSplits(false).build();
doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
doReturn(new TezCounters()).when(inputContext).getCounters();
MRInput mrInput = new MRInput(inputContext, 1);
mrInput.initialize();
MRRuntimeProtos.MRSplitProto splitProto = MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName()).build();
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
List<Event> events = new LinkedList<>();
events.add(diEvent);
mrInput.handleEvents(events);
TezCounter counter = mrInput.getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES);
assertEquals(counter.getValue(), TestInputSplit.length);
assertTrue(TestInputFormat.invoked.get());
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMultiMRInput method testExtraEvents.
@Test(timeout = 5000)
public void testExtraEvents() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents");
JobConf jobConf = new JobConf(defaultConf);
jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(jobConf, workDir);
InputContext inputContext = createTezInputContext(jobConf);
MultiMRInput input = new MultiMRInput(inputContext, 1);
input.initialize();
createSplits(1, workDir, jobConf, new AtomicLong());
SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
InputSplit[] splits = format.getSplits(jobConf, 1);
assertEquals(1, splits.length);
MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(1, splitProto.toByteString().asReadOnlyByteBuffer());
List<Event> eventList = new ArrayList<Event>();
eventList.add(event1);
eventList.add(event2);
try {
input.handleEvents(eventList);
fail("Expecting Exception due to too many events");
} catch (Exception e) {
assertTrue(e.getMessage().contains("Unexpected event. All physical sources already initialized"));
}
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMultiMRInput method test0PhysicalInputs.
@Test(timeout = 5000)
public void test0PhysicalInputs() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
JobConf jobConf = new JobConf(defaultConf);
jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(jobConf, workDir);
InputContext inputContext = createTezInputContext(jobConf);
MultiMRInput mMrInput = new MultiMRInput(inputContext, 0);
mMrInput.initialize();
mMrInput.start();
assertEquals(0, mMrInput.getKeyValueReaders().size());
List<Event> events = new LinkedList<>();
try {
mMrInput.handleEvents(events);
fail("HandleEvents should cause an input with 0 physical inputs to fail");
} catch (Exception e) {
assertTrue(e instanceof IllegalStateException);
}
}
Aggregations