use of org.apache.hadoop.mapred.RawKeyValueIterator in project hadoop by apache.
the class TestGridMixClasses method testSleepReducer.
/*
* test SleepReducer
*/
@Test(timeout = 3000)
public void testSleepReducer() throws Exception {
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(FileOutputFormat.COMPRESS, true);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
RawKeyValueIterator input = new FakeRawKeyValueReducerIterator();
Counter counter = new GenericCounter();
Counter inputValueCounter = new GenericCounter();
RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new DummyReporter();
RawComparator<GridmixKey> comparator = new FakeRawComparator();
ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>(conf, taskId, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, NullWritable.class);
org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context<GridmixKey, NullWritable, NullWritable, NullWritable> context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>().getReducerContext(reducecontext);
SleepReducer test = new SleepReducer();
long start = System.currentTimeMillis();
test.setup(context);
long sleeper = context.getCurrentKey().getReduceOutputBytes();
// status has been changed
assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus());
// should sleep 0.9 sec
assertTrue(System.currentTimeMillis() >= (start + sleeper));
test.cleanup(context);
// status has been changed again
assertEquals("Slept for " + sleeper, context.getStatus());
}
use of org.apache.hadoop.mapred.RawKeyValueIterator in project hadoop by apache.
the class TestGridMixClasses method testLoadJobLoadReducer.
/*
* test LoadReducer
*/
@Test(timeout = 3000)
public void testLoadJobLoadReducer() throws Exception {
LoadJob.LoadReducer test = new LoadJob.LoadReducer();
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(FileOutputFormat.COMPRESS, true);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskid = new TaskAttemptID();
RawKeyValueIterator input = new FakeRawKeyValueIterator();
Counter counter = new GenericCounter();
Counter inputValueCounter = new GenericCounter();
LoadRecordWriter output = new LoadRecordWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new DummyReporter();
RawComparator<GridmixKey> comparator = new FakeRawComparator();
ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>(conf, taskid, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, GridmixRecord.class);
// read for previous data
reduceContext.nextKeyValue();
org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>().getReducerContext(reduceContext);
// test.setup(context);
test.run(context);
// have been readed 9 records (-1 for previous)
assertEquals(9, counter.getValue());
assertEquals(10, inputValueCounter.getValue());
assertEquals(1, output.getData().size());
GridmixRecord record = output.getData().values().iterator().next();
assertEquals(1593, record.getSize());
}
use of org.apache.hadoop.mapred.RawKeyValueIterator in project hadoop by apache.
the class MergeManagerImpl method finalMerge.
private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs, List<InMemoryMapOutput<K, V>> inMemoryMapOutputs, List<CompressAwarePath> onDiskMapOutputs) throws IOException {
LOG.info("finalMerge called with " + inMemoryMapOutputs.size() + " in-memory map-outputs and " + onDiskMapOutputs.size() + " on-disk map-outputs");
final long maxInMemReduce = getMaxInMemReduceLimit();
// merge config params
Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
Class<V> valueClass = (Class<V>) job.getMapOutputValueClass();
boolean keepInputs = job.getKeepFailedTaskFiles();
final Path tmpDir = new Path(reduceId.toString());
final RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
// segments required to vacate memory
List<Segment<K, V>> memDiskSegments = new ArrayList<Segment<K, V>>();
long inMemToDiskBytes = 0;
boolean mergePhaseFinished = false;
if (inMemoryMapOutputs.size() > 0) {
TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce);
final int numMemDiskSegments = memDiskSegments.size();
if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {
// If we reach here, it implies that we have less than io.sort.factor
// disk segments and this will be incremented by 1 (result of the
// memory segments merge). Since this total would still be
// <= io.sort.factor, we will not do any more intermediate merges,
// the merge of all these disk segments would be directly fed to the
// reduce method
mergePhaseFinished = true;
// must spill to disk, but can't retain in-mem for intermediate merge
final Path outputPath = mapOutputFile.getInputFileForWrite(mapId, inMemToDiskBytes).suffix(Task.MERGED_OUTPUT_PREFIX);
final RawKeyValueIterator rIter = Merger.merge(job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null, mergePhase);
FSDataOutputStream out = CryptoUtils.wrapIfNecessary(job, fs.create(outputPath));
Writer<K, V> writer = new Writer<K, V>(job, out, keyClass, valueClass, codec, null, true);
try {
Merger.writeFile(rIter, writer, reporter, job);
writer.close();
onDiskMapOutputs.add(new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength()));
writer = null;
// add to list of final disk outputs.
} catch (IOException e) {
if (null != outputPath) {
try {
fs.delete(outputPath, true);
} catch (IOException ie) {
// NOTHING
}
}
throw e;
} finally {
if (null != writer) {
writer.close();
}
}
LOG.info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy " + "reduce memory limit");
inMemToDiskBytes = 0;
memDiskSegments.clear();
} else if (inMemToDiskBytes != 0) {
LOG.info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge");
}
}
// segments on disk
List<Segment<K, V>> diskSegments = new ArrayList<Segment<K, V>>();
long onDiskBytes = inMemToDiskBytes;
long rawBytes = inMemToDiskBytes;
CompressAwarePath[] onDisk = onDiskMapOutputs.toArray(new CompressAwarePath[onDiskMapOutputs.size()]);
for (CompressAwarePath file : onDisk) {
long fileLength = fs.getFileStatus(file).getLen();
onDiskBytes += fileLength;
rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength;
LOG.debug("Disk file: " + file + " Length is " + fileLength);
diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs, (file.toString().endsWith(Task.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter), file.getRawDataLength()));
}
LOG.info("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
Collections.sort(diskSegments, new Comparator<Segment<K, V>>() {
public int compare(Segment<K, V> o1, Segment<K, V> o2) {
if (o1.getLength() == o2.getLength()) {
return 0;
}
return o1.getLength() < o2.getLength() ? -1 : 1;
}
});
// build final list of segments from merged backed by disk + in-mem
List<Segment<K, V>> finalSegments = new ArrayList<Segment<K, V>>();
long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
LOG.info("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce");
if (0 != onDiskBytes) {
final int numInMemSegments = memDiskSegments.size();
diskSegments.addAll(0, memDiskSegments);
memDiskSegments.clear();
// Pass mergePhase only if there is a going to be intermediate
// merges. See comment where mergePhaseFinished is being set
Progress thisPhase = (mergePhaseFinished) ? null : mergePhase;
RawKeyValueIterator diskMerge = Merger.merge(job, fs, keyClass, valueClass, codec, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false, spilledRecordsCounter, null, thisPhase);
diskSegments.clear();
if (0 == finalSegments.size()) {
return diskMerge;
}
finalSegments.add(new Segment<K, V>(new RawKVIteratorReader(diskMerge, onDiskBytes), true, rawBytes));
}
return Merger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, reporter, spilledRecordsCounter, null, null);
}
use of org.apache.hadoop.mapred.RawKeyValueIterator in project tez by apache.
the class MRCombiner method createReduceContext.
private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(Configuration conf, TaskAttemptID mrTaskAttemptID, final TezRawKeyValueIterator rawIter, Counter combineInputRecordsCounter, Counter combineOutputRecordsCounter, RecordWriter<KEYOUT, VALUEOUT> recordWriter, MRTaskReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valClass) throws InterruptedException, IOException {
RawKeyValueIterator r = new RawKeyValueIterator() {
@Override
public boolean next() throws IOException {
return rawIter.next();
}
@Override
public DataInputBuffer getValue() throws IOException {
return rawIter.getValue();
}
@Override
public Progress getProgress() {
return rawIter.getProgress();
}
@Override
public DataInputBuffer getKey() throws IOException {
return rawIter.getKey();
}
@Override
public void close() throws IOException {
rawIter.close();
}
};
ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(conf, mrTaskAttemptID, r, null, combineInputRecordsCounter, recordWriter, null, reporter, comparator, keyClass, valClass);
org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>().getReducerContext(rContext);
return reducerContext;
}
use of org.apache.hadoop.mapred.RawKeyValueIterator in project hadoop by apache.
the class Shuffle method run.
@Override
public RawKeyValueIterator run() throws IOException, InterruptedException {
// Scale the maximum events we fetch per RPC call to mitigate OOM issues
// on the ApplicationMaster when a thundering herd of reducers fetch events
// TODO: This should not be necessary after HADOOP-8942
int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH, MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks());
int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer);
// Start the map-completion events fetcher thread
final EventFetcher<K, V> eventFetcher = new EventFetcher<K, V>(reduceId, umbilical, scheduler, this, maxEventsToFetch);
eventFetcher.start();
// Start the map-output fetcher threads
boolean isLocal = localMapFiles != null;
final int numFetchers = isLocal ? 1 : jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
Fetcher<K, V>[] fetchers = new Fetcher[numFetchers];
if (isLocal) {
fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret(), localMapFiles);
fetchers[0].start();
} else {
for (int i = 0; i < numFetchers; ++i) {
fetchers[i] = new Fetcher<K, V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret());
fetchers[i].start();
}
}
// Wait for shuffle to complete successfully
while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
reporter.progress();
synchronized (this) {
if (throwable != null) {
throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable);
}
}
}
// Stop the event-fetcher thread
eventFetcher.shutDown();
// Stop the map-output fetcher threads
for (Fetcher<K, V> fetcher : fetchers) {
fetcher.shutDown();
}
// stop the scheduler
scheduler.close();
// copy is already complete
copyPhase.complete();
taskStatus.setPhase(TaskStatus.Phase.SORT);
reduceTask.statusUpdate(umbilical);
// Finish the on-going merges...
RawKeyValueIterator kvIter = null;
try {
kvIter = merger.close();
} catch (Throwable e) {
throw new ShuffleError("Error while doing final merge ", e);
}
// Sanity check
synchronized (this) {
if (throwable != null) {
throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable);
}
}
return kvIter;
}
Aggregations