use of org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair in project tez by apache.
the class FilterByWordInputProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
if (_inputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
}
if (_outputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
}
for (LogicalInput input : _inputs.values()) {
input.start();
}
for (LogicalOutput output : _outputs.values()) {
output.start();
}
LogicalInput li = _inputs.values().iterator().next();
if (!(li instanceof MRInput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
}
LogicalOutput lo = _outputs.values().iterator().next();
if (!(lo instanceof UnorderedKVOutput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
}
progressHelper.scheduleProgressTaskService(0, 100);
MRInputLegacy mrInput = (MRInputLegacy) li;
mrInput.init();
UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
Configuration updatedConf = mrInput.getConfigUpdates();
Text srcFile = new Text();
srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
if (updatedConf != null) {
String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
if (fileName != null) {
LOG.info("Processing file: " + fileName);
srcFile.set(fileName);
}
}
KeyValueReader kvReader = mrInput.getReader();
KeyValueWriter kvWriter = kvOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object val = kvReader.getCurrentValue();
Text valText = (Text) val;
String readVal = valText.toString();
if (readVal.contains(filterWord)) {
LongWritable lineNum = (LongWritable) key;
TextLongPair outVal = new TextLongPair(srcFile, lineNum);
kvWriter.write(valText, outVal);
}
}
}
Aggregations