Search in sources :

Example 1 with TextLongPair

use of org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair in project tez by apache.

the class FilterByWordInputProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
    if (_inputs.size() != 1) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
    }
    if (_outputs.size() != 1) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
    }
    for (LogicalInput input : _inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : _outputs.values()) {
        output.start();
    }
    LogicalInput li = _inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }
    LogicalOutput lo = _outputs.values().iterator().next();
    if (!(lo instanceof UnorderedKVOutput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }
    progressHelper.scheduleProgressTaskService(0, 100);
    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }
    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();
    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();
        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}
Also used : MRInput(org.apache.tez.mapreduce.input.MRInput) ProgressHelper(org.apache.tez.common.ProgressHelper) Configuration(org.apache.hadoop.conf.Configuration) TextLongPair(org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) Text(org.apache.hadoop.io.Text) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) UnorderedKVOutput(org.apache.tez.runtime.library.output.UnorderedKVOutput) LogicalInput(org.apache.tez.runtime.api.LogicalInput) LongWritable(org.apache.hadoop.io.LongWritable) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)1 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1 ProgressHelper (org.apache.tez.common.ProgressHelper)1 TextLongPair (org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair)1 MRInput (org.apache.tez.mapreduce.input.MRInput)1 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)1 LogicalInput (org.apache.tez.runtime.api.LogicalInput)1 LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)1 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)1 KeyValueWriter (org.apache.tez.runtime.library.api.KeyValueWriter)1 UnorderedKVOutput (org.apache.tez.runtime.library.output.UnorderedKVOutput)1