Search in sources :

Example 6 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestClientProtocolForPipelineRecovery method testPipelineRecoveryWithTransferBlock.

// Test to verify that blocks are no longer corrupted after HDFS-4660.
// Revert HDFS-4660 and the other related ones (HDFS-9220, HDFS-8722), this
// test would fail.
// Scenario: Prior to the fix, block get corrupted when the transferBlock
// happens during pipeline recovery with extra bytes to make up the end of
// chunk.
// For verification, Need to fail the pipeline for last datanode when the
// second datanode have more bytes on disk than already acked bytes.
// This will enable to transfer extra bytes to the newNode to makeup
// end-of-chunk during pipeline recovery. This is achieved by the customized
// DataNodeFaultInjector class in this test.
// For detailed info, please refer to HDFS-4660 and HDFS-10587. HDFS-9220
// fixes an issue in HDFS-4660 patch, and HDFS-8722 is an optimization.
@Test
public void testPipelineRecoveryWithTransferBlock() throws Exception {
    final int chunkSize = 512;
    final int oneWriteSize = 5000;
    final int totalSize = 1024 * 1024;
    final int errorInjectionPos = 512;
    Configuration conf = new HdfsConfiguration();
    // Need 4 datanodes to verify the replaceDatanode during pipeline recovery
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    DataNodeFaultInjector old = DataNodeFaultInjector.get();
    try {
        DistributedFileSystem fs = cluster.getFileSystem();
        Path fileName = new Path("/f");
        FSDataOutputStream o = fs.create(fileName);
        int count = 0;
        // Flush to get the pipeline created.
        o.writeBytes("hello");
        o.hflush();
        DFSOutputStream dfsO = (DFSOutputStream) o.getWrappedStream();
        final DatanodeInfo[] pipeline = dfsO.getStreamer().getNodes();
        final String lastDn = pipeline[2].getXferAddr(false);
        final AtomicBoolean failed = new AtomicBoolean(false);
        DataNodeFaultInjector.set(new DataNodeFaultInjector() {

            @Override
            public void failPipeline(ReplicaInPipeline replicaInfo, String mirror) throws IOException {
                if (!lastDn.equals(mirror)) {
                    // Only fail for second DN
                    return;
                }
                if (!failed.get() && (replicaInfo.getBytesAcked() > errorInjectionPos) && (replicaInfo.getBytesAcked() % chunkSize != 0)) {
                    int count = 0;
                    while (count < 10) {
                        // described in HDFS-4660 would occur.
                        if ((replicaInfo.getBytesOnDisk() / chunkSize) - (replicaInfo.getBytesAcked() / chunkSize) >= 1) {
                            failed.set(true);
                            throw new IOException("Failing Pipeline " + replicaInfo.getBytesAcked() + " : " + replicaInfo.getBytesOnDisk());
                        }
                        try {
                            Thread.sleep(200);
                        } catch (InterruptedException e) {
                        }
                        count++;
                    }
                }
            }
        });
        Random r = new Random();
        byte[] b = new byte[oneWriteSize];
        while (count < totalSize) {
            r.nextBytes(b);
            o.write(b);
            count += oneWriteSize;
            o.hflush();
        }
        assertTrue("Expected a failure in the pipeline", failed.get());
        DatanodeInfo[] newNodes = dfsO.getStreamer().getNodes();
        o.close();
        // Trigger block report to NN
        for (DataNode d : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(d);
        }
        // Read from the replaced datanode to verify the corruption. So shutdown
        // all other nodes in the pipeline.
        List<DatanodeInfo> pipelineList = Arrays.asList(pipeline);
        DatanodeInfo newNode = null;
        for (DatanodeInfo node : newNodes) {
            if (!pipelineList.contains(node)) {
                newNode = node;
                break;
            }
        }
        LOG.info("Number of nodes in pipeline: {} newNode {}", newNodes.length, newNode.getName());
        // shutdown old 2 nodes
        for (int i = 0; i < newNodes.length; i++) {
            if (newNodes[i].getName().equals(newNode.getName())) {
                continue;
            }
            LOG.info("shutdown {}", newNodes[i].getName());
            cluster.stopDataNode(newNodes[i].getName());
        }
        // Read should be successfull from only the newNode. There should not be
        // any corruption reported.
        DFSTestUtil.readFile(fs, fileName);
    } finally {
        DataNodeFaultInjector.set(old);
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Configuration(org.apache.hadoop.conf.Configuration) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Random(java.util.Random) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ReplicaInPipeline(org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 DataNodeFaultInjector (org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector)6 Test (org.junit.Test)6 Configuration (org.apache.hadoop.conf.Configuration)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)2 Random (java.util.Random)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)1 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)1 DatanodeInfoBuilder (org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder)1 ReplicaInPipeline (org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline)1 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)1 TemporarySocketDirectory (org.apache.hadoop.net.unix.TemporarySocketDirectory)1 InvocationOnMock (org.mockito.invocation.InvocationOnMock)1