use of org.apache.tez.common.ProgressHelper in project tez by apache.
the class SimpleProcessor method run.
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
preOp();
run();
postOp();
}
use of org.apache.tez.common.ProgressHelper in project tez by apache.
the class ReduceProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
if (_outputs.size() <= 0 || _outputs.size() > 1) {
throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
}
if (_inputs.size() <= 0 || _inputs.size() > 1) {
throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
}
LogicalInput in = _inputs.values().iterator().next();
in.start();
List<Input> pendingInputs = new LinkedList<Input>();
pendingInputs.add(in);
processorContext.waitForAllInputsReady(pendingInputs);
LOG.info("Input is ready for consumption. Starting Output");
LogicalOutput out = _outputs.values().iterator().next();
out.start();
initTask(out);
progressHelper.scheduleProgressTaskService(0, 100);
this.statusUpdate();
Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
LOG.info("Using keyClass: " + keyClass);
LOG.info("Using valueClass: " + valueClass);
RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
LOG.info("Using comparator: " + comparator);
reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
// Sanity check
if (!(in instanceof OrderedGroupedInputLegacy)) {
throw new IOException("Illegal input to reduce: " + in.getClass());
}
OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
KeyValuesReader kvReader = shuffleInput.getReader();
KeyValueWriter kvWriter = null;
if ((out instanceof MROutputLegacy)) {
kvWriter = ((MROutputLegacy) out).getWriter();
} else if ((out instanceof OrderedPartitionedKVOutput)) {
kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
} else {
throw new IOException("Illegal output to reduce: " + in.getClass());
}
if (useNewApi) {
try {
runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
} else {
runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
}
done();
}
use of org.apache.tez.common.ProgressHelper in project tez by apache.
the class FilterByWordInputProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
if (_inputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
}
if (_outputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
}
for (LogicalInput input : _inputs.values()) {
input.start();
}
for (LogicalOutput output : _outputs.values()) {
output.start();
}
LogicalInput li = _inputs.values().iterator().next();
if (!(li instanceof MRInput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
}
LogicalOutput lo = _outputs.values().iterator().next();
if (!(lo instanceof UnorderedKVOutput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
}
progressHelper.scheduleProgressTaskService(0, 100);
MRInputLegacy mrInput = (MRInputLegacy) li;
mrInput.init();
UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
Configuration updatedConf = mrInput.getConfigUpdates();
Text srcFile = new Text();
srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
if (updatedConf != null) {
String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
if (fileName != null) {
LOG.info("Processing file: " + fileName);
srcFile.set(fileName);
}
}
KeyValueReader kvReader = mrInput.getReader();
KeyValueWriter kvWriter = kvOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object val = kvReader.getCurrentValue();
Text valText = (Text) val;
String readVal = valText.toString();
if (readVal.contains(filterWord)) {
LongWritable lineNum = (LongWritable) key;
TextLongPair outVal = new TextLongPair(srcFile, lineNum);
kvWriter.write(valText, outVal);
}
}
}
use of org.apache.tez.common.ProgressHelper in project tez by apache.
the class SleepProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
inputs = _inputs;
outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
LOG.info("Running the Sleep Processor, sleeping for " + timeToSleepMS + " ms");
for (LogicalInput input : _inputs.values()) {
input.start();
}
progressHelper.scheduleProgressTaskService(0, 100);
for (LogicalOutput output : _outputs.values()) {
output.start();
}
try {
Thread.sleep(timeToSleepMS);
} catch (InterruptedException ie) {
// ignore
}
}
use of org.apache.tez.common.ProgressHelper in project tez by apache.
the class MapProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
LOG.info("Running map: " + processorContext.getUniqueIdentifier());
if (_inputs.size() != 1 || _outputs.size() != 1) {
throw new IOException("Cannot handle multiple _inputs or _outputs" + ", inputCount=" + _inputs.size() + ", outputCount=" + _outputs.size());
}
for (LogicalInput input : _inputs.values()) {
input.start();
}
for (LogicalOutput output : _outputs.values()) {
output.start();
}
LogicalInput in = _inputs.values().iterator().next();
LogicalOutput out = _outputs.values().iterator().next();
initTask(out);
progressHelper.scheduleProgressTaskService(0, 100);
// Sanity check
if (!(in instanceof MRInputLegacy)) {
throw new IOException(new TezException("Only MRInputLegacy supported. Input: " + in.getClass()));
}
MRInputLegacy input = (MRInputLegacy) in;
input.init();
Configuration incrementalConf = input.getConfigUpdates();
if (incrementalConf != null) {
for (Entry<String, String> entry : incrementalConf) {
jobConf.set(entry.getKey(), entry.getValue());
}
}
KeyValueWriter kvWriter = null;
if ((out instanceof MROutputLegacy)) {
kvWriter = ((MROutputLegacy) out).getWriter();
} else if ((out instanceof OrderedPartitionedKVOutput)) {
kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
} else {
throw new IOException("Illegal output to map, outputClass=" + out.getClass());
}
if (useNewApi) {
runNewMapper(jobConf, mrReporter, input, kvWriter);
} else {
runOldMapper(jobConf, mrReporter, input, kvWriter);
}
done();
}
Aggregations