Search in sources :

Example 11 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestBlockReaderLocalLegacy method testStablePositionAfterCorruptRead.

/**
   * Test that, in the case of an error, the position and limit of a ByteBuffer
   * are left unchanged. This is not mandated by ByteBufferReadable, but clients
   * of this class might immediately issue a retry on failure, so it's polite.
   */
@Test
public void testStablePositionAfterCorruptRead() throws Exception {
    final short REPL_FACTOR = 1;
    final long FILE_LENGTH = 512L;
    HdfsConfiguration conf = getConfiguration(null);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();
    Path path = new Path("/corrupted");
    DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
    DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
    int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
    assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
    FSDataInputStream dis = cluster.getFileSystem().open(path);
    ByteBuffer buf = ByteBuffer.allocateDirect((int) FILE_LENGTH);
    boolean sawException = false;
    try {
        dis.read(buf);
    } catch (ChecksumException ex) {
        sawException = true;
    }
    assertTrue(sawException);
    assertEquals(0, buf.position());
    assertEquals(buf.capacity(), buf.limit());
    dis = cluster.getFileSystem().open(path);
    buf.position(3);
    buf.limit(25);
    sawException = false;
    try {
        dis.read(buf);
    } catch (ChecksumException ex) {
        sawException = true;
    }
    assertTrue(sawException);
    assertEquals(3, buf.position());
    assertEquals(25, buf.limit());
    cluster.shutdown();
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) ChecksumException(org.apache.hadoop.fs.ChecksumException) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ByteBuffer(java.nio.ByteBuffer) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 12 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestEditLog method testEditChecksum.

@Test
public void testEditChecksum() throws Exception {
    // start a cluster 
    Configuration conf = getConf();
    MiniDFSCluster cluster = null;
    FileSystem fileSys = null;
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();
    fileSys.mkdirs(new Path("/tmp"));
    Iterator<StorageDirectory> iter = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS);
    LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>();
    while (iter.hasNext()) {
        sds.add(iter.next());
    }
    editLog.close();
    cluster.shutdown();
    for (StorageDirectory sd : sds) {
        File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3);
        assertTrue(editFile.exists());
        long fileLen = editFile.length();
        LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen);
        RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
        // seek to checksum bytes
        rwf.seek(fileLen - 4);
        int b = rwf.readInt();
        rwf.seek(fileLen - 4);
        rwf.writeInt(b + 1);
        rwf.close();
    }
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build();
        fail("should not be able to start");
    } catch (IOException e) {
        // expected
        assertNotNull("Cause of exception should be ChecksumException", e.getCause());
        assertEquals("Cause of exception should be ChecksumException", ChecksumException.class, e.getCause().getClass());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ChecksumException(org.apache.hadoop.fs.ChecksumException) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) IOException(java.io.IOException) LinkedList(java.util.LinkedList) RandomAccessFile(java.io.RandomAccessFile) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 13 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFsck method testUnderMinReplicatedBlock.

@Test
public void testUnderMinReplicatedBlock() throws Exception {
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
    // Set short retry timeouts so this test runs faster
    conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
    // Set minReplication to 2
    short minReplication = 2;
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, minReplication);
    FileSystem fs = null;
    DFSClient dfsClient = null;
    LocatedBlocks blocks = null;
    int replicaCount = 0;
    Random random = new Random();
    String outStr = null;
    short factor = 1;
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    Path file1 = new Path("/testUnderMinReplicatedBlock");
    DFSTestUtil.createFile(fs, file1, 1024, minReplication, 0);
    // Wait until file replication has completed
    DFSTestUtil.waitReplication(fs, file1, minReplication);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
    // Make sure filesystem is in healthy state
    outStr = runFsck(conf, 0, true, "/");
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    // corrupt the first replica
    File blockFile = cluster.getBlockFile(0, block);
    if (blockFile != null && blockFile.exists()) {
        RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
        FileChannel channel = raFile.getChannel();
        String badString = "BADBAD";
        int rand = random.nextInt((int) channel.size() / 2);
        raFile.seek(rand);
        raFile.write(badString.getBytes());
        raFile.close();
    }
    dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
    blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
    replicaCount = blocks.get(0).getLocations().length;
    while (replicaCount != factor) {
        try {
            Thread.sleep(100);
            // Read the file to trigger reportBadBlocks
            try {
                IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf, true);
            } catch (IOException ie) {
                assertTrue(ie instanceof ChecksumException);
            }
            System.out.println("sleep in try: replicaCount=" + replicaCount + "  factor=" + factor);
        } catch (InterruptedException ignore) {
        }
        blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
        replicaCount = blocks.get(0).getLocations().length;
    }
    // Check if fsck reports the same
    outStr = runFsck(conf, 0, true, "/");
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(outStr.contains("UNDER MIN REPL'D BLOCKS:\t1 (100.0 %)"));
    assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileChannel(java.nio.channels.FileChannel) InetSocketAddress(java.net.InetSocketAddress) ChecksumException(org.apache.hadoop.fs.ChecksumException) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) Matchers.anyString(org.mockito.Matchers.anyString) IOException(java.io.IOException) IOUtils(org.apache.hadoop.io.IOUtils) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 14 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class DFSInputStream method readWithStrategy.

protected synchronized int readWithStrategy(ReaderStrategy strategy) throws IOException {
    dfsClient.checkOpen();
    if (closed.get()) {
        throw new IOException("Stream closed");
    }
    int len = strategy.getTargetLength();
    CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
    failures = 0;
    if (pos < getFileLength()) {
        int retries = 2;
        while (retries > 0) {
            try {
                // error on the same block. See HDFS-3067
                if (pos > blockEnd || currentNode == null) {
                    currentNode = blockSeekTo(pos);
                }
                int realLen = (int) Math.min(len, (blockEnd - pos + 1L));
                synchronized (infoLock) {
                    if (locatedBlocks.isLastBlockComplete()) {
                        realLen = (int) Math.min(realLen, locatedBlocks.getFileLength() - pos);
                    }
                }
                int result = readBuffer(strategy, realLen, corruptedBlocks);
                if (result >= 0) {
                    pos += result;
                } else {
                    // got a EOS from reader though we expect more data on it.
                    throw new IOException("Unexpected EOS from the reader");
                }
                return result;
            } catch (ChecksumException ce) {
                throw ce;
            } catch (IOException e) {
                checkInterrupted(e);
                if (retries == 1) {
                    DFSClient.LOG.warn("DFS Read", e);
                }
                blockEnd = -1;
                if (currentNode != null) {
                    addToDeadNodes(currentNode);
                }
                if (--retries == 0) {
                    throw e;
                }
            } finally {
                // Check if need to report block replicas corruption either read
                // was successful or ChecksumException occured.
                reportCheckSumFailure(corruptedBlocks, currentLocatedBlock.getLocations().length, false);
            }
        }
    }
    return -1;
}
Also used : CorruptedBlocks(org.apache.hadoop.hdfs.DFSUtilClient.CorruptedBlocks) ChecksumException(org.apache.hadoop.fs.ChecksumException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 15 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class DFSInputStream method actualGetFromOneDataNode.

/**
   * Read data from one DataNode.
   *
   * @param datanode          the datanode from which to read data
   * @param block             the located block containing the requested data
   * @param startInBlk        the startInBlk offset of the block
   * @param endInBlk          the endInBlk offset of the block
   * @param buf               the given byte buffer into which the data is read
   * @param corruptedBlocks   map recording list of datanodes with corrupted
   *                          block replica
   */
void actualGetFromOneDataNode(final DNAddrPair datanode, LocatedBlock block, final long startInBlk, final long endInBlk, ByteBuffer buf, CorruptedBlocks corruptedBlocks) throws IOException {
    DFSClientFaultInjector.get().startFetchFromDatanode();
    // only need to get a new access token once
    int refetchToken = 1;
    // only need to get a new encryption key once
    int refetchEncryptionKey = 1;
    final int len = (int) (endInBlk - startInBlk + 1);
    while (true) {
        // cached block locations may have been updated by chooseDataNode()
        // or fetchBlockAt(). Always get the latest list of locations at the
        // start of the loop.
        block = refreshLocatedBlock(block);
        BlockReader reader = null;
        try {
            DFSClientFaultInjector.get().fetchFromDatanodeException();
            reader = getBlockReader(block, startInBlk, len, datanode.addr, datanode.storageType, datanode.info);
            //Behave exactly as the readAll() call
            ByteBuffer tmp = buf.duplicate();
            tmp.limit(tmp.position() + len);
            tmp = tmp.slice();
            int nread = 0;
            int ret;
            while (true) {
                ret = reader.read(tmp);
                if (ret <= 0) {
                    break;
                }
                nread += ret;
            }
            buf.position(buf.position() + nread);
            IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
            dfsClient.updateFileSystemReadStats(reader.getNetworkDistance(), nread);
            if (nread != len) {
                throw new IOException("truncated return from reader.read(): " + "excpected " + len + ", got " + nread);
            }
            DFSClientFaultInjector.get().readFromDatanodeDelay();
            return;
        } catch (ChecksumException e) {
            String msg = "fetchBlockByteRange(). Got a checksum exception for " + src + " at " + block.getBlock() + ":" + e.getPos() + " from " + datanode.info;
            DFSClient.LOG.warn(msg);
            // we want to remember what we have tried
            corruptedBlocks.addCorruptedBlock(block.getBlock(), datanode.info);
            addToDeadNodes(datanode.info);
            throw new IOException(msg);
        } catch (IOException e) {
            checkInterrupted(e);
            if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
                DFSClient.LOG.info("Will fetch a new encryption key and retry, " + "encryption key was invalid when connecting to " + datanode.addr + " : " + e);
                // The encryption key used is invalid.
                refetchEncryptionKey--;
                dfsClient.clearDataEncryptionKey();
            } else if (refetchToken > 0 && tokenRefetchNeeded(e, datanode.addr)) {
                refetchToken--;
                try {
                    fetchBlockAt(block.getStartOffset());
                } catch (IOException fbae) {
                // ignore IOE, since we can retry it later in a loop
                }
            } else {
                String msg = "Failed to connect to " + datanode.addr + " for file " + src + " for block " + block.getBlock() + ":" + e;
                DFSClient.LOG.warn("Connection failure: " + msg, e);
                addToDeadNodes(datanode.info);
                throw new IOException(msg);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
}
Also used : InvalidEncryptionKeyException(org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException) ChecksumException(org.apache.hadoop.fs.ChecksumException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer)

Aggregations

ChecksumException (org.apache.hadoop.fs.ChecksumException)20 Path (org.apache.hadoop.fs.Path)12 Test (org.junit.Test)12 IOException (java.io.IOException)9 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)5 File (java.io.File)4 RandomAccessFile (java.io.RandomAccessFile)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)4 DataOutputStream (java.io.DataOutputStream)3 InterruptedIOException (java.io.InterruptedIOException)3 InetSocketAddress (java.net.InetSocketAddress)3 ByteBuffer (java.nio.ByteBuffer)3 DFSClient (org.apache.hadoop.hdfs.DFSClient)3 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)3 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3