use of org.apache.hadoop.mapred.IFileInputStream in project hadoop by apache.
the class InMemoryMapOutput method doShuffle.
@Override
protected void doShuffle(MapHost host, IFileInputStream iFin, long compressedLength, long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) throws IOException {
InputStream input = iFin;
// Are map-outputs compressed?
if (codec != null) {
decompressor.reset();
input = codec.createInputStream(input, decompressor);
}
try {
IOUtils.readFully(input, memory, 0, memory.length);
metrics.inputBytes(memory.length);
reporter.progress();
LOG.info("Read " + memory.length + " bytes from map-output for " + getMapId());
/**
* We've gotten the amount of data we were expecting. Verify the
* decompressor has nothing more to offer. This action also forces the
* decompressor to read any trailing bytes that weren't critical
* for decompression, which is necessary to keep the stream
* in sync.
*/
if (input.read() >= 0) {
throw new IOException("Unexpected extra bytes from input stream for " + getMapId());
}
} finally {
CodecPool.returnDecompressor(decompressor);
}
}
use of org.apache.hadoop.mapred.IFileInputStream in project hadoop by apache.
the class TestFetcher method testCorruptedIFile.
@Test
public void testCorruptedIFile() throws Exception {
final int fetcher = 7;
Path onDiskMapOutputPath = new Path(name.getMethodName() + "/foo");
Path shuffledToDisk = OnDiskMapOutput.getTempPath(onDiskMapOutputPath, fetcher);
fs = FileSystem.getLocal(job).getRaw();
IFileWrappedMapOutput<Text, Text> odmo = new OnDiskMapOutput<Text, Text>(map1ID, mm, 100L, job, fetcher, true, fs, onDiskMapOutputPath);
String mapData = "MAPDATA12345678901234567890";
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bout);
IFileOutputStream ios = new IFileOutputStream(dos);
header.write(dos);
int headerSize = dos.size();
try {
ios.write(mapData.getBytes());
} finally {
ios.close();
}
int dataSize = bout.size() - headerSize;
// Ensure that the OnDiskMapOutput shuffler can successfully read the data.
MapHost host = new MapHost("TestHost", "http://test/url");
ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
try {
// Read past the shuffle header.
bin.read(new byte[headerSize], 0, headerSize);
odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
} finally {
bin.close();
}
// Now corrupt the IFile data.
byte[] corrupted = bout.toByteArray();
corrupted[headerSize + (dataSize / 2)] = 0x0;
try {
bin = new ByteArrayInputStream(corrupted);
// Read past the shuffle header.
bin.read(new byte[headerSize], 0, headerSize);
odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file");
} catch (ChecksumException e) {
LOG.info("The expected checksum exception was thrown.", e);
} finally {
bin.close();
}
// Ensure that the shuffled file can be read.
IFileInputStream iFin = new IFileInputStream(fs.open(shuffledToDisk), dataSize, job);
try {
iFin.read(new byte[dataSize], 0, dataSize);
} finally {
iFin.close();
}
}
Aggregations