use of org.apache.hadoop.io.RawComparator in project hadoop by apache.
the class ReduceTask method run.
@Override
@SuppressWarnings("unchecked")
public void run(JobConf job, final TaskUmbilicalProtocol umbilical) throws IOException, InterruptedException, ClassNotFoundException {
job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
if (isMapOrReduce()) {
copyPhase = getProgress().addPhase("copy");
sortPhase = getProgress().addPhase("sort");
reducePhase = getProgress().addPhase("reduce");
}
// start thread that will handle communication with parent
TaskReporter reporter = startReporter(umbilical);
boolean useNewApi = job.getUseNewReducer();
initialize(job, getJobID(), reporter, useNewApi);
// check if it is a cleanupJobTask
if (jobCleanup) {
runJobCleanupTask(umbilical, reporter);
return;
}
if (jobSetup) {
runJobSetupTask(umbilical, reporter);
return;
}
if (taskCleanup) {
runTaskCleanupTask(umbilical, reporter);
return;
}
// Initialize the codec
codec = initCodec();
RawKeyValueIterator rIter = null;
ShuffleConsumerPlugin shuffleConsumerPlugin = null;
Class combinerClass = conf.getCombinerClass();
CombineOutputCollector combineCollector = (null != combinerClass) ? new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;
Class<? extends ShuffleConsumerPlugin> clazz = job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical, super.lDirAlloc, reporter, codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter, shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter, taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles);
shuffleConsumerPlugin.init(shuffleContext);
rIter = shuffleConsumerPlugin.run();
// free up the data structures
mapOutputFilesOnDisk.clear();
// sort is complete
sortPhase.complete();
setPhase(TaskStatus.Phase.REDUCE);
statusUpdate(umbilical);
Class keyClass = job.getMapOutputKeyClass();
Class valueClass = job.getMapOutputValueClass();
RawComparator comparator = job.getOutputValueGroupingComparator();
if (useNewApi) {
runNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
} else {
runOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
}
shuffleConsumerPlugin.close();
done(umbilical, reporter);
}
use of org.apache.hadoop.io.RawComparator in project tez by apache.
the class TestTezMerger method testWithCustomComparator_WithEmptyStrings.
@Test(timeout = 5000)
public void testWithCustomComparator_WithEmptyStrings() throws Exception {
List<Path> pathList = new LinkedList<Path>();
List<String> data = Lists.newLinkedList();
// Merge datasets with custom comparator
RawComparator rc = new CustomComparator();
LOG.info("Test with custom comparator with empty strings in middle");
// Test with 4 files, where some texts are empty strings
data.add("0");
data.add("0");
pathList.add(createIFileWithTextData(data));
// Second file with empty key
data.clear();
data.add("");
pathList.add(createIFileWithTextData(data));
// Third file
data.clear();
data.add("0");
data.add("0");
pathList.add(createIFileWithTextData(data));
// Third file
data.clear();
data.add("1");
data.add("2");
pathList.add(createIFileWithTextData(data));
TezRawKeyValueIterator records = merge(pathList, rc);
String[][] expectedResult = { // formatting intentionally
{ "", DIFF_KEY }, { "0", DIFF_KEY }, { "0", SAME_KEY }, { "0", SAME_KEY }, { "0", SAME_KEY }, { "1", DIFF_KEY }, { "2", DIFF_KEY } };
verify(records, expectedResult);
pathList.clear();
data.clear();
}
Aggregations