Search in sources :

Example 6 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class DFSInputStream method readBuffer.

/* This is a used by regular read() and handles ChecksumExceptions.
   * name readBuffer() is chosen to imply similarity to readBuffer() in
   * ChecksumFileSystem
   */
private synchronized int readBuffer(ReaderStrategy reader, int len, CorruptedBlocks corruptedBlocks) throws IOException {
    IOException ioe;
    /* we retry current node only once. So this is set to true only here.
     * Intention is to handle one common case of an error that is not a
     * failure on datanode or client : when DataNode closes the connection
     * since client is idle. If there are other cases of "non-errors" then
     * then a datanode might be retried by setting this to true again.
     */
    boolean retryCurrentNode = true;
    while (true) {
        // retry as many times as seekToNewSource allows.
        try {
            return reader.readFromBlock(blockReader, len);
        } catch (ChecksumException ce) {
            DFSClient.LOG.warn("Found Checksum error for " + getCurrentBlock() + " from " + currentNode + " at " + ce.getPos());
            ioe = ce;
            retryCurrentNode = false;
            // we want to remember which block replicas we have tried
            corruptedBlocks.addCorruptedBlock(getCurrentBlock(), currentNode);
        } catch (IOException e) {
            if (!retryCurrentNode) {
                DFSClient.LOG.warn("Exception while reading from " + getCurrentBlock() + " of " + src + " from " + currentNode, e);
            }
            ioe = e;
        }
        boolean sourceFound;
        if (retryCurrentNode) {
            /* possibly retry the same node so that transient errors don't
         * result in application level failures (e.g. Datanode could have
         * closed the connection because the client is idle for too long).
         */
            sourceFound = seekToBlockSource(pos);
        } else {
            addToDeadNodes(currentNode);
            sourceFound = seekToNewSource(pos);
        }
        if (!sourceFound) {
            throw ioe;
        }
        retryCurrentNode = false;
    }
}
Also used : ChecksumException(org.apache.hadoop.fs.ChecksumException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 7 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFSInputChecker method checkFileCorruption.

private void checkFileCorruption(LocalFileSystem fileSys, Path file, Path fileToCorrupt) throws IOException {
    // corrupt the file 
    RandomAccessFile out = new RandomAccessFile(new File(fileToCorrupt.toString()), "rw");
    byte[] buf = new byte[(int) fileSys.getFileStatus(file).getLen()];
    int corruptFileLen = (int) fileSys.getFileStatus(fileToCorrupt).getLen();
    assertTrue(buf.length >= corruptFileLen);
    rand.nextBytes(buf);
    out.seek(corruptFileLen / 2);
    out.write(buf, 0, corruptFileLen / 4);
    out.close();
    boolean gotException = false;
    InputStream in = fileSys.open(file);
    try {
        IOUtils.readFully(in, buf, 0, buf.length);
    } catch (ChecksumException e) {
        gotException = true;
    }
    assertTrue(gotException);
    in.close();
}
Also used : RandomAccessFile(java.io.RandomAccessFile) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 8 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestIndexCache method testBadIndex.

public void testBadIndex() throws Exception {
    final int parts = 30;
    fs.delete(p, true);
    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
    IndexCache cache = new IndexCache(conf);
    Path f = new Path(p, "badindex");
    FSDataOutputStream out = fs.create(f, false);
    CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
    DataOutputStream dout = new DataOutputStream(iout);
    for (int i = 0; i < parts; ++i) {
        for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
            if (0 == (i % 3)) {
                dout.writeLong(i);
            } else {
                out.writeLong(i);
            }
        }
    }
    out.writeLong(iout.getChecksum().getValue());
    dout.close();
    try {
        cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName());
        fail("Did not detect bad checksum");
    } catch (IOException e) {
        if (!(e.getCause() instanceof ChecksumException)) {
            throw e;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CRC32(java.util.zip.CRC32) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CheckedOutputStream(java.util.zip.CheckedOutputStream) IOException(java.io.IOException)

Example 9 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFetcher method testCorruptedIFile.

@Test
public void testCorruptedIFile() throws Exception {
    final int fetcher = 7;
    Path onDiskMapOutputPath = new Path(name.getMethodName() + "/foo");
    Path shuffledToDisk = OnDiskMapOutput.getTempPath(onDiskMapOutputPath, fetcher);
    fs = FileSystem.getLocal(job).getRaw();
    IFileWrappedMapOutput<Text, Text> odmo = new OnDiskMapOutput<Text, Text>(map1ID, mm, 100L, job, fetcher, true, fs, onDiskMapOutputPath);
    String mapData = "MAPDATA12345678901234567890";
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(bout);
    IFileOutputStream ios = new IFileOutputStream(dos);
    header.write(dos);
    int headerSize = dos.size();
    try {
        ios.write(mapData.getBytes());
    } finally {
        ios.close();
    }
    int dataSize = bout.size() - headerSize;
    // Ensure that the OnDiskMapOutput shuffler can successfully read the data.
    MapHost host = new MapHost("TestHost", "http://test/url");
    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
    try {
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
    } finally {
        bin.close();
    }
    // Now corrupt the IFile data.
    byte[] corrupted = bout.toByteArray();
    corrupted[headerSize + (dataSize / 2)] = 0x0;
    try {
        bin = new ByteArrayInputStream(corrupted);
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
        fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file");
    } catch (ChecksumException e) {
        LOG.info("The expected checksum exception was thrown.", e);
    } finally {
        bin.close();
    }
    // Ensure that the shuffled file can be read.
    IFileInputStream iFin = new IFileInputStream(fs.open(shuffledToDisk), dataSize, job);
    try {
        iFin.read(new byte[dataSize], 0, dataSize);
    } finally {
        iFin.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) IFileOutputStream(org.apache.hadoop.mapred.IFileOutputStream) IFileInputStream(org.apache.hadoop.mapred.IFileInputStream) Test(org.junit.Test)

Example 10 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hbase by apache.

the class ChecksumUtil method validateChecksum.

/**
   * Validates that the data in the specified HFileBlock matches the checksum. Generates the
   * checksums for the data and then validate that it matches those stored in the end of the data.
   * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
   * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
   * @param offset offset of the data being validated. Only used for logging.
   * @param hdrSize Size of the block header in {@code data}. Only used for logging.
   * @return True if checksum matches, else false.
   */
static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize) throws IOException {
    // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
    // so we always return true.
    ChecksumType cktype = ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
    if (cktype == ChecksumType.NULL) {
        // No checksum validations needed for this block.
        return true;
    }
    // read in the stored value of the checksum size from the header.
    int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
    DataChecksum dataChecksum = DataChecksum.newDataChecksum(cktype.getDataChecksumType(), bytesPerChecksum);
    assert dataChecksum != null;
    int onDiskDataSizeWithHeader = buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
    if (LOG.isTraceEnabled()) {
        LOG.info("dataLength=" + buffer.capacity() + ", sizeWithHeader=" + onDiskDataSizeWithHeader + ", checksumType=" + cktype.getName() + ", file=" + pathName + ", offset=" + offset + ", headerSize=" + hdrSize + ", bytesPerChecksum=" + bytesPerChecksum);
    }
    try {
        ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
        ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader).limit(buffer.capacity());
        dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
    } catch (ChecksumException e) {
        return false;
    }
    // checksum is valid
    return true;
}
Also used : ChecksumException(org.apache.hadoop.fs.ChecksumException) ChecksumType(org.apache.hadoop.hbase.util.ChecksumType) ByteBuffer(java.nio.ByteBuffer) DataChecksum(org.apache.hadoop.util.DataChecksum)

Aggregations

ChecksumException (org.apache.hadoop.fs.ChecksumException)20 Path (org.apache.hadoop.fs.Path)12 Test (org.junit.Test)12 IOException (java.io.IOException)9 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)5 File (java.io.File)4 RandomAccessFile (java.io.RandomAccessFile)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)4 DataOutputStream (java.io.DataOutputStream)3 InterruptedIOException (java.io.InterruptedIOException)3 InetSocketAddress (java.net.InetSocketAddress)3 ByteBuffer (java.nio.ByteBuffer)3 DFSClient (org.apache.hadoop.hdfs.DFSClient)3 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)3 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3