Search in sources :

Example 1 with IFileOutputStream

use of org.apache.hadoop.mapred.IFileOutputStream in project hadoop by apache.

the class TestFetcher method testCorruptedIFile.

@Test
public void testCorruptedIFile() throws Exception {
    final int fetcher = 7;
    Path onDiskMapOutputPath = new Path(name.getMethodName() + "/foo");
    Path shuffledToDisk = OnDiskMapOutput.getTempPath(onDiskMapOutputPath, fetcher);
    fs = FileSystem.getLocal(job).getRaw();
    IFileWrappedMapOutput<Text, Text> odmo = new OnDiskMapOutput<Text, Text>(map1ID, mm, 100L, job, fetcher, true, fs, onDiskMapOutputPath);
    String mapData = "MAPDATA12345678901234567890";
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(bout);
    IFileOutputStream ios = new IFileOutputStream(dos);
    header.write(dos);
    int headerSize = dos.size();
    try {
        ios.write(mapData.getBytes());
    } finally {
        ios.close();
    }
    int dataSize = bout.size() - headerSize;
    // Ensure that the OnDiskMapOutput shuffler can successfully read the data.
    MapHost host = new MapHost("TestHost", "http://test/url");
    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
    try {
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
    } finally {
        bin.close();
    }
    // Now corrupt the IFile data.
    byte[] corrupted = bout.toByteArray();
    corrupted[headerSize + (dataSize / 2)] = 0x0;
    try {
        bin = new ByteArrayInputStream(corrupted);
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
        fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file");
    } catch (ChecksumException e) {
        LOG.info("The expected checksum exception was thrown.", e);
    } finally {
        bin.close();
    }
    // Ensure that the shuffled file can be read.
    IFileInputStream iFin = new IFileInputStream(fs.open(shuffledToDisk), dataSize, job);
    try {
        iFin.read(new byte[dataSize], 0, dataSize);
    } finally {
        iFin.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) IFileOutputStream(org.apache.hadoop.mapred.IFileOutputStream) IFileInputStream(org.apache.hadoop.mapred.IFileInputStream) Test(org.junit.Test)

Example 2 with IFileOutputStream

use of org.apache.hadoop.mapred.IFileOutputStream in project hadoop by apache.

the class TestFetcher method testCopyFromHostExtraBytes.

@Test
public void testCopyFromHostExtraBytes() throws Exception {
    Fetcher<Text, Text> underTest = new FakeFetcher<Text, Text>(job, id, ss, mm, r, metrics, except, key, connection);
    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
    when(connection.getResponseCode()).thenReturn(200);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash);
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(bout);
    IFileOutputStream ios = new IFileOutputStream(dos);
    header.write(dos);
    ios.write("MAPDATA123".getBytes());
    ios.finish();
    ShuffleHeader header2 = new ShuffleHeader(map2ID.toString(), 14, 10, 1);
    IFileOutputStream ios2 = new IFileOutputStream(dos);
    header2.write(dos);
    ios2.write("MAPDATA456".getBytes());
    ios2.finish();
    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
    when(connection.getInputStream()).thenReturn(in);
    // 8 < 10 therefore there appear to be extra bytes in the IFileInputStream
    IFileWrappedMapOutput<Text, Text> mapOut = new InMemoryMapOutput<Text, Text>(job, map1ID, mm, 8, null, true);
    IFileWrappedMapOutput<Text, Text> mapOut2 = new InMemoryMapOutput<Text, Text>(job, map2ID, mm, 10, null, true);
    when(mm.reserve(eq(map1ID), anyLong(), anyInt())).thenReturn(mapOut);
    when(mm.reserve(eq(map2ID), anyLong(), anyInt())).thenReturn(mapOut2);
    underTest.copyFromHost(host);
    verify(allErrs).increment(1);
    verify(ss).copyFailed(map1ID, host, true, false);
    verify(ss, never()).copyFailed(map2ID, host, true, false);
    verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
    verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IFileOutputStream(org.apache.hadoop.mapred.IFileOutputStream) Test(org.junit.Test)

Aggregations

ByteArrayInputStream (java.io.ByteArrayInputStream)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataOutputStream (java.io.DataOutputStream)2 Text (org.apache.hadoop.io.Text)2 IFileOutputStream (org.apache.hadoop.mapred.IFileOutputStream)2 Test (org.junit.Test)2 ChecksumException (org.apache.hadoop.fs.ChecksumException)1 Path (org.apache.hadoop.fs.Path)1 IFileInputStream (org.apache.hadoop.mapred.IFileInputStream)1