use of org.apache.tez.runtime.library.api.KeyValueWriter in project tez by apache.
the class TestOnFileUnorderedKVOutput method testWithPipelinedShuffle.
@Test(timeout = 30000)
@SuppressWarnings("unchecked")
public void testWithPipelinedShuffle() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 1);
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
for (int i = 0; i < 500; i++) {
kvWriter.write(new Text(RandomStringUtils.randomAscii(10000)), new IntWritable(i));
}
events = kvOutput.close();
// When pipelining is on, all events are sent out within writer itself.
assertTrue(events != null && events.size() == 0);
// ensure that data is sent via outputContext.
ArgumentCaptor<List> eventsCaptor = ArgumentCaptor.forClass(List.class);
verify(outputContext, atLeast(1)).sendEvents(eventsCaptor.capture());
events = eventsCaptor.getValue();
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertTrue(shufflePayload.hasLastEvent());
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
use of org.apache.tez.runtime.library.api.KeyValueWriter in project tez by apache.
the class ReduceProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
if (_outputs.size() <= 0 || _outputs.size() > 1) {
throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
}
if (_inputs.size() <= 0 || _inputs.size() > 1) {
throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
}
LogicalInput in = _inputs.values().iterator().next();
in.start();
List<Input> pendingInputs = new LinkedList<Input>();
pendingInputs.add(in);
processorContext.waitForAllInputsReady(pendingInputs);
LOG.info("Input is ready for consumption. Starting Output");
LogicalOutput out = _outputs.values().iterator().next();
out.start();
initTask(out);
progressHelper.scheduleProgressTaskService(0, 100);
this.statusUpdate();
Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
LOG.info("Using keyClass: " + keyClass);
LOG.info("Using valueClass: " + valueClass);
RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
LOG.info("Using comparator: " + comparator);
reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
// Sanity check
if (!(in instanceof OrderedGroupedInputLegacy)) {
throw new IOException("Illegal input to reduce: " + in.getClass());
}
OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
KeyValuesReader kvReader = shuffleInput.getReader();
KeyValueWriter kvWriter = null;
if ((out instanceof MROutputLegacy)) {
kvWriter = ((MROutputLegacy) out).getWriter();
} else if ((out instanceof OrderedPartitionedKVOutput)) {
kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
} else {
throw new IOException("Illegal output to reduce: " + in.getClass());
}
if (useNewApi) {
try {
runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
} else {
runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
}
done();
}
use of org.apache.tez.runtime.library.api.KeyValueWriter in project tez by apache.
the class FilterByWordInputProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
if (_inputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
}
if (_outputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
}
for (LogicalInput input : _inputs.values()) {
input.start();
}
for (LogicalOutput output : _outputs.values()) {
output.start();
}
LogicalInput li = _inputs.values().iterator().next();
if (!(li instanceof MRInput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
}
LogicalOutput lo = _outputs.values().iterator().next();
if (!(lo instanceof UnorderedKVOutput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
}
progressHelper.scheduleProgressTaskService(0, 100);
MRInputLegacy mrInput = (MRInputLegacy) li;
mrInput.init();
UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
Configuration updatedConf = mrInput.getConfigUpdates();
Text srcFile = new Text();
srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
if (updatedConf != null) {
String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
if (fileName != null) {
LOG.info("Processing file: " + fileName);
srcFile.set(fileName);
}
}
KeyValueReader kvReader = mrInput.getReader();
KeyValueWriter kvWriter = kvOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object val = kvReader.getCurrentValue();
Text valText = (Text) val;
String readVal = valText.toString();
if (readVal.contains(filterWord)) {
LongWritable lineNum = (LongWritable) key;
TextLongPair outVal = new TextLongPair(srcFile, lineNum);
kvWriter.write(valText, outVal);
}
}
}
use of org.apache.tez.runtime.library.api.KeyValueWriter in project tez by apache.
the class FilterByWordOutputProcessor method run.
@Override
public void run() throws Exception {
if (inputs.size() != 1) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
}
if (outputs.size() != 1) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
}
for (LogicalInput input : inputs.values()) {
input.start();
}
for (LogicalOutput output : outputs.values()) {
output.start();
}
LogicalInput li = inputs.values().iterator().next();
if (!(li instanceof UnorderedKVInput)) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
}
LogicalOutput lo = outputs.values().iterator().next();
if (!(lo instanceof MROutput)) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
}
UnorderedKVInput kvInput = (UnorderedKVInput) li;
MROutput mrOutput = (MROutput) lo;
KeyValueReader kvReader = kvInput.getReader();
KeyValueWriter kvWriter = mrOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object value = kvReader.getCurrentValue();
kvWriter.write(key, value);
}
}
use of org.apache.tez.runtime.library.api.KeyValueWriter in project tez by apache.
the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.
@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
List<KVPair> data = KVDataGen.generateTestData(true, 0);
for (KVPair kvp : data) {
kvWriter.write(kvp.getKey(), kvp.getvalue());
}
events = kvOutput.close();
assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
assertTrue(events != null && events.size() == 2);
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
Aggregations