use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.
the class ReduceProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
if (_outputs.size() <= 0 || _outputs.size() > 1) {
throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
}
if (_inputs.size() <= 0 || _inputs.size() > 1) {
throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
}
LogicalInput in = _inputs.values().iterator().next();
in.start();
List<Input> pendingInputs = new LinkedList<Input>();
pendingInputs.add(in);
processorContext.waitForAllInputsReady(pendingInputs);
LOG.info("Input is ready for consumption. Starting Output");
LogicalOutput out = _outputs.values().iterator().next();
out.start();
initTask(out);
progressHelper.scheduleProgressTaskService(0, 100);
this.statusUpdate();
Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
LOG.info("Using keyClass: " + keyClass);
LOG.info("Using valueClass: " + valueClass);
RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
LOG.info("Using comparator: " + comparator);
reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
// Sanity check
if (!(in instanceof OrderedGroupedInputLegacy)) {
throw new IOException("Illegal input to reduce: " + in.getClass());
}
OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
KeyValuesReader kvReader = shuffleInput.getReader();
KeyValueWriter kvWriter = null;
if ((out instanceof MROutputLegacy)) {
kvWriter = ((MROutputLegacy) out).getWriter();
} else if ((out instanceof OrderedPartitionedKVOutput)) {
kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
} else {
throw new IOException("Illegal output to reduce: " + in.getClass());
}
if (useNewApi) {
try {
runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
} else {
runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
}
done();
}
use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.
the class LogicalIOProcessorRuntimeTask method initialize.
/**
* @throws Exception
*/
public void initialize() throws Exception {
Preconditions.checkState(this.state.get() == State.NEW, "Already initialized");
this.state.set(State.INITED);
this.processorContext = createProcessorContext();
this.processor = createProcessor(processorDescriptor.getClassName(), processorContext);
if (initializeProcessorFirst || initializeProcessorIOSerially) {
// Initialize processor in the current thread.
initializeLogicalIOProcessor();
}
int numTasks = 0;
int inputIndex = 0;
for (InputSpec inputSpec : taskSpec.getInputs()) {
this.initializerCompletionService.submit(new InitializeInputCallable(inputSpec, inputIndex++));
numTasks++;
}
int outputIndex = 0;
for (OutputSpec outputSpec : taskSpec.getOutputs()) {
this.initializerCompletionService.submit(new InitializeOutputCallable(outputSpec, outputIndex++));
numTasks++;
}
if (!(initializeProcessorFirst || initializeProcessorIOSerially)) {
// Initialize processor in the current thread.
initializeLogicalIOProcessor();
}
int completedTasks = 0;
while (completedTasks < numTasks) {
LOG.info("Waiting for " + (numTasks - completedTasks) + " initializers to finish");
Future<Void> future = initializerCompletionService.take();
try {
future.get();
completedTasks++;
} catch (ExecutionException e) {
if (e.getCause() instanceof Exception) {
throw (Exception) e.getCause();
} else {
throw new Exception(e);
}
}
}
LOG.info("All initializers finished");
// group inputs depend on inputs beings initialized. So must be done after.
initializeGroupInputs();
// Register the groups so that appropriate calls can be made.
this.inputReadyTracker.setGroupedInputs(groupInputsMap == null ? null : groupInputsMap.values());
// Grouped input start will be controlled by the start of the GroupedInput
// Construct the set of groupedInputs up front so that start is not invoked on them.
Set<String> groupInputs = Sets.newHashSet();
// first add the group inputs
if (groupInputSpecs != null && !groupInputSpecs.isEmpty()) {
for (GroupInputSpec groupInputSpec : groupInputSpecs) {
runInputMap.put(groupInputSpec.getGroupName(), groupInputsMap.get(groupInputSpec.getGroupName()));
groupInputs.addAll(groupInputSpec.getGroupVertices());
}
}
initialMemoryDistributor.makeInitialAllocations();
LOG.info("Starting Inputs/Outputs");
int numAutoStarts = 0;
for (InputSpec inputSpec : inputSpecs) {
if (groupInputs.contains(inputSpec.getSourceVertexName())) {
LOG.info("Ignoring " + inputSpec.getSourceVertexName() + " for start, since it will be controlled via it's Group");
continue;
}
if (!inputAlreadyStarted(taskSpec.getVertexName(), inputSpec.getSourceVertexName())) {
startedInputsMap.put(taskSpec.getVertexName(), inputSpec.getSourceVertexName());
numAutoStarts++;
this.initializerCompletionService.submit(new StartInputCallable(inputsMap.get(inputSpec.getSourceVertexName()), inputSpec.getSourceVertexName()));
LOG.info("Input: " + inputSpec.getSourceVertexName() + " being auto started by the framework. Subsequent instances will not be auto-started");
}
}
if (groupInputSpecs != null) {
for (GroupInputSpec group : groupInputSpecs) {
if (!inputAlreadyStarted(taskSpec.getVertexName(), group.getGroupName())) {
numAutoStarts++;
this.initializerCompletionService.submit(new StartInputCallable(groupInputsMap.get(group.getGroupName()), group.getGroupName()));
LOG.info("InputGroup: " + group.getGroupName() + " being auto started by the framework. Subsequent instance will not be auto-started");
}
}
}
// Shutdown after all tasks complete.
this.initializerExecutor.shutdown();
completedTasks = 0;
LOG.info("Num IOs determined for AutoStart: " + numAutoStarts);
while (completedTasks < numAutoStarts) {
LOG.info("Waiting for " + (numAutoStarts - completedTasks) + " IOs to start");
Future<Void> future = initializerCompletionService.take();
try {
future.get();
completedTasks++;
} catch (ExecutionException e) {
if (e.getCause() instanceof Exception) {
throw (Exception) e.getCause();
} else {
throw new Exception(e);
}
}
}
LOG.info("AutoStartComplete");
// then add the non-grouped inputs
for (InputSpec inputSpec : inputSpecs) {
if (!groupInputs.contains(inputSpec.getSourceVertexName())) {
LogicalInput input = inputsMap.get(inputSpec.getSourceVertexName());
runInputMap.put(inputSpec.getSourceVertexName(), input);
}
}
for (OutputSpec outputSpec : outputSpecs) {
LogicalOutput output = outputsMap.get(outputSpec.getDestinationVertexName());
String outputName = outputSpec.getDestinationVertexName();
runOutputMap.put(outputName, output);
}
// TODO Maybe close initialized inputs / outputs in case of failure to
// initialize.
startRouterThread();
}
use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.
the class FilterByWordInputProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
if (_inputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
}
if (_outputs.size() != 1) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
}
for (LogicalInput input : _inputs.values()) {
input.start();
}
for (LogicalOutput output : _outputs.values()) {
output.start();
}
LogicalInput li = _inputs.values().iterator().next();
if (!(li instanceof MRInput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
}
LogicalOutput lo = _outputs.values().iterator().next();
if (!(lo instanceof UnorderedKVOutput)) {
throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
}
progressHelper.scheduleProgressTaskService(0, 100);
MRInputLegacy mrInput = (MRInputLegacy) li;
mrInput.init();
UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
Configuration updatedConf = mrInput.getConfigUpdates();
Text srcFile = new Text();
srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
if (updatedConf != null) {
String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
if (fileName != null) {
LOG.info("Processing file: " + fileName);
srcFile.set(fileName);
}
}
KeyValueReader kvReader = mrInput.getReader();
KeyValueWriter kvWriter = kvOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object val = kvReader.getCurrentValue();
Text valText = (Text) val;
String readVal = valText.toString();
if (readVal.contains(filterWord)) {
LongWritable lineNum = (LongWritable) key;
TextLongPair outVal = new TextLongPair(srcFile, lineNum);
kvWriter.write(valText, outVal);
}
}
}
use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.
the class FilterByWordOutputProcessor method run.
@Override
public void run() throws Exception {
if (inputs.size() != 1) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
}
if (outputs.size() != 1) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
}
for (LogicalInput input : inputs.values()) {
input.start();
}
for (LogicalOutput output : outputs.values()) {
output.start();
}
LogicalInput li = inputs.values().iterator().next();
if (!(li instanceof UnorderedKVInput)) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
}
LogicalOutput lo = outputs.values().iterator().next();
if (!(lo instanceof MROutput)) {
throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
}
UnorderedKVInput kvInput = (UnorderedKVInput) li;
MROutput mrOutput = (MROutput) lo;
KeyValueReader kvReader = kvInput.getReader();
KeyValueWriter kvWriter = mrOutput.getWriter();
while (kvReader.next()) {
Object key = kvReader.getCurrentKey();
Object value = kvReader.getCurrentValue();
kvWriter.write(key, value);
}
}
use of org.apache.tez.runtime.api.LogicalOutput in project hive by apache.
the class TezProcessor method initializeAndRunProcessor.
protected void initializeAndRunProcessor(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
Throwable originalThrowable = null;
try {
MRTaskReporter mrReporter = new MRTaskReporter(getContext());
// Init and run are both potentially long, and blocking operations. Synchronization
// with the 'abort' operation will not work since if they end up blocking on a monitor
// which does not belong to the lock, the abort will end up getting blocked.
// Both of these method invocations need to handle the abort call on their own.
rproc.init(mrReporter, inputs, outputs);
rproc.run();
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR);
} catch (Throwable t) {
rproc.setAborted(true);
originalThrowable = t;
} finally {
if (originalThrowable != null && (originalThrowable instanceof Error || Throwables.getRootCause(originalThrowable) instanceof Error)) {
LOG.error("Cannot recover from this FATAL error", originalThrowable);
getContext().reportFailure(TaskFailureType.FATAL, originalThrowable, "Cannot recover from this error");
throw new RuntimeException(originalThrowable);
}
try {
if (rproc != null) {
rproc.close();
}
} catch (Throwable t) {
if (originalThrowable == null) {
originalThrowable = t;
}
}
// commit the output tasks
try {
for (LogicalOutput output : outputs.values()) {
if (output instanceof MROutput) {
MROutput mrOutput = (MROutput) output;
if (mrOutput.isCommitRequired()) {
mrOutput.commit();
}
}
}
} catch (Throwable t) {
if (originalThrowable == null) {
originalThrowable = t;
}
}
if (originalThrowable != null) {
LOG.error("Failed initializeAndRunProcessor", originalThrowable);
// abort the output tasks
for (LogicalOutput output : outputs.values()) {
if (output instanceof MROutput) {
MROutput mrOutput = (MROutput) output;
if (mrOutput.isCommitRequired()) {
mrOutput.abort();
}
}
}
if (originalThrowable instanceof InterruptedException) {
throw (InterruptedException) originalThrowable;
} else {
throw new RuntimeException(originalThrowable);
}
}
}
}
Aggregations