use of org.apache.tez.mapreduce.lib.MRReader in project hive by apache.
the class MapRecordSource method closeReader.
private void closeReader() {
if (!(reader instanceof MRReader)) {
LOG.warn("Cannot close " + (reader == null ? null : reader.getClass()));
return;
}
if (reader instanceof KeyValueInputMerger) {
// cleanup
KeyValueInputMerger kvMerger = (KeyValueInputMerger) reader;
kvMerger.clean();
}
LOG.info("Closing MRReader on error");
MRReader mrReader = (MRReader) reader;
try {
mrReader.close();
} catch (IOException ex) {
LOG.error("Failed to close the reader; ignoring", ex);
}
}
use of org.apache.tez.mapreduce.lib.MRReader in project tez by apache.
the class MultiMRInput method initFromEvent.
private MRReader initFromEvent(InputDataInformationEvent event) throws IOException {
Preconditions.checkState(event != null, "Event must be specified");
if (LOG.isDebugEnabled()) {
LOG.debug(getContext().getSourceVertexName() + " initializing Reader: " + eventCount.get());
}
MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(event.getUserPayload()));
MRReader reader = null;
JobConf localJobConf = new JobConf(jobConf);
long splitLength = -1;
if (useNewApi) {
InputSplit split = MRInputUtils.getNewSplitDetailsFromEvent(splitProto, localJobConf);
try {
splitLength = split.getLength();
} catch (InterruptedException e) {
LOG.warn("Got interrupted while reading split length: ", e);
}
reader = new MRReaderMapReduce(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
if (LOG.isDebugEnabled()) {
LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength);
}
} else {
org.apache.hadoop.mapred.InputSplit split = MRInputUtils.getOldSplitDetailsFromEvent(splitProto, localJobConf);
splitLength = split.getLength();
reader = new MRReaderMapred(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext());
if (LOG.isDebugEnabled()) {
LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength);
}
}
if (splitLength != -1) {
getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).increment(splitLength);
}
LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event");
return reader;
}
use of org.apache.tez.mapreduce.lib.MRReader in project tez by apache.
the class MultiMRInput method close.
@Override
public List<Event> close() throws Exception {
for (MRReader reader : readers) {
reader.close();
}
long inputRecords = getContext().getCounters().findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);
return null;
}
use of org.apache.tez.mapreduce.lib.MRReader in project tez by apache.
the class MultiMRInput method handleEvents.
@Override
public void handleEvents(List<Event> inputEvents) throws Exception {
lock.lock();
try {
if (getNumPhysicalInputs() == 0) {
throw new IllegalStateException("Unexpected event. MultiMRInput has been setup to receive 0 events");
}
Preconditions.checkState(eventCount.get() + inputEvents.size() <= getNumPhysicalInputs(), "Unexpected event. All physical sources already initialized");
for (Event event : inputEvents) {
MRReader reader = initFromEvent((InputDataInformationEvent) event);
readers.add(reader);
if (eventCount.incrementAndGet() == getNumPhysicalInputs()) {
getContext().inputIsReady();
condition.signal();
}
}
} finally {
lock.unlock();
}
}
Aggregations