Search in sources :

Example 1 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestClientProtocolForPipelineRecovery method testPacketTransmissionDelay.

@Test
public void testPacketTransmissionDelay() throws Exception {
    // Make the first datanode to not relay heartbeat packet.
    DataNodeFaultInjector dnFaultInjector = new DataNodeFaultInjector() {

        @Override
        public boolean dropHeartbeatPacket() {
            return true;
        }
    };
    DataNodeFaultInjector oldDnInjector = DataNodeFaultInjector.get();
    DataNodeFaultInjector.set(dnFaultInjector);
    // Setting the timeout to be 3 seconds. Normally heartbeat packet
    // would be sent every 1.5 seconds if there is no data traffic.
    Configuration conf = new HdfsConfiguration();
    conf.set(HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, "3000");
    MiniDFSCluster cluster = null;
    try {
        int numDataNodes = 2;
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        FSDataOutputStream out = fs.create(new Path("noheartbeat.dat"), (short) 2);
        out.write(0x31);
        out.hflush();
        DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream();
        // original pipeline
        DatanodeInfo[] orgNodes = dfsOut.getPipeline();
        // Cause the second datanode to timeout on reading packet
        Thread.sleep(3500);
        out.write(0x32);
        out.hflush();
        // new pipeline
        DatanodeInfo[] newNodes = dfsOut.getPipeline();
        out.close();
        boolean contains = false;
        for (int i = 0; i < newNodes.length; i++) {
            if (orgNodes[0].getXferAddr().equals(newNodes[i].getXferAddr())) {
                throw new IOException("The first datanode should have been replaced.");
            }
            if (orgNodes[1].getXferAddr().equals(newNodes[i].getXferAddr())) {
                contains = true;
            }
        }
        Assert.assertTrue(contains);
    } finally {
        DataNodeFaultInjector.set(oldDnInjector);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Configuration(org.apache.hadoop.conf.Configuration) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 2 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestClientProtocolForPipelineRecovery method testZeroByteBlockRecovery.

/**
   * Test to make sure the checksum is set correctly after pipeline
   * recovery transfers 0 byte partial block. If fails the test case
   * will say "java.io.IOException: Failed to replace a bad datanode
   * on the existing pipeline due to no more good datanodes being
   * available to try."  This indicates there was a real failure
   * after the staged failure.
   */
@Test
public void testZeroByteBlockRecovery() throws Exception {
    // Make the first datanode fail once. With 3 nodes and a block being
    // created with 2 replicas, anything more than this planned failure
    // will cause a test failure.
    DataNodeFaultInjector dnFaultInjector = new DataNodeFaultInjector() {

        int tries = 1;

        @Override
        public void stopSendingPacketDownstream(final String mirrAddr) throws IOException {
            if (tries > 0) {
                tries--;
                try {
                    Thread.sleep(60000);
                } catch (InterruptedException ie) {
                    throw new IOException("Interrupted while sleeping. Bailing out.");
                }
            }
        }
    };
    DataNodeFaultInjector oldDnInjector = DataNodeFaultInjector.get();
    DataNodeFaultInjector.set(dnFaultInjector);
    Configuration conf = new HdfsConfiguration();
    conf.set(HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, "1000");
    conf.set(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.POLICY_KEY, "ALWAYS");
    MiniDFSCluster cluster = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        FSDataOutputStream out = fs.create(new Path("noheartbeat.dat"), (short) 2);
        out.write(0x31);
        out.hflush();
        out.close();
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
        DataNodeFaultInjector.set(oldDnInjector);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 3 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestSpaceReservation method testReservedSpaceForPipelineRecovery.

@Test(timeout = 30000)
public void testReservedSpaceForPipelineRecovery() throws Exception {
    final short replication = 3;
    startCluster(BLOCK_SIZE, replication, -1);
    final String methodName = GenericTestUtils.getMethodName();
    final Path file = new Path("/" + methodName + ".01.dat");
    old = DataNodeFaultInjector.get();
    // Fault injector to fail connection to mirror first time.
    DataNodeFaultInjector.set(new DataNodeFaultInjector() {

        private int tries = 0;

        @Override
        public void failMirrorConnection() throws IOException {
            if (tries++ == 0) {
                throw new IOException("Failing Mirror for space reservation");
            }
        }
    });
    // Write 1 byte to the file and kill the writer.
    FSDataOutputStream os = fs.create(file, replication);
    os.write(new byte[1]);
    os.close();
    // Ensure all space reserved for the replica was released on each
    // DataNode.
    cluster.triggerBlockReports();
    for (final DataNode dn : cluster.getDataNodes()) {
        try (FsDatasetSpi.FsVolumeReferences volumes = dn.getFSDataset().getFsVolumeReferences()) {
            final FsVolumeImpl volume = (FsVolumeImpl) volumes.get(0);
            GenericTestUtils.waitFor(new Supplier<Boolean>() {

                @Override
                public Boolean get() {
                    LOG.info("dn " + dn.getDisplayName() + " space : " + volume.getReservedForReplicas());
                    return (volume.getReservedForReplicas() == 0);
                }
            }, 100, // Wait until the test times out.
            Integer.MAX_VALUE);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) IOException(java.io.IOException) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 4 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestShortCircuitCache method testDataXceiverHandlesRequestShortCircuitShmFailure.

// Regression test for HADOOP-11802
@Test(timeout = 60000)
public void testDataXceiverHandlesRequestShortCircuitShmFailure() throws Exception {
    BlockReaderTestUtil.enableShortCircuitShmTracing();
    TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
    Configuration conf = createShortCircuitConf("testDataXceiverHandlesRequestShortCircuitShmFailure", sockDir);
    conf.setLong(HdfsClientConfigKeys.Read.ShortCircuit.STREAMS_CACHE_EXPIRY_MS_KEY, 1000000000L);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    final Path TEST_PATH1 = new Path("/test_file1");
    DFSTestUtil.createFile(fs, TEST_PATH1, 4096, (short) 1, 0xFADE1);
    LOG.info("Setting failure injector and performing a read which " + "should fail...");
    DataNodeFaultInjector failureInjector = Mockito.mock(DataNodeFaultInjector.class);
    Mockito.doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            throw new IOException("injected error into sendShmResponse");
        }
    }).when(failureInjector).sendShortCircuitShmResponse();
    DataNodeFaultInjector prevInjector = DataNodeFaultInjector.instance;
    DataNodeFaultInjector.instance = failureInjector;
    try {
        // The first read will try to allocate a shared memory segment and slot.
        // The shared memory segment allocation will fail because of the failure
        // injector.
        DFSTestUtil.readFileBuffer(fs, TEST_PATH1);
        Assert.fail("expected readFileBuffer to fail, but it succeeded.");
    } catch (Throwable t) {
        GenericTestUtils.assertExceptionContains("TCP reads were disabled for " + "testing, but we failed to do a non-TCP read.", t);
    }
    checkNumberOfSegmentsAndSlots(0, 0, cluster.getDataNodes().get(0).getShortCircuitRegistry());
    LOG.info("Clearing failure injector and performing another read...");
    DataNodeFaultInjector.instance = prevInjector;
    fs.getClient().getClientContext().getDomainSocketFactory().clearPathMap();
    // The second read should succeed.
    DFSTestUtil.readFileBuffer(fs, TEST_PATH1);
    // We should have added a new short-circuit shared memory segment and slot.
    checkNumberOfSegmentsAndSlots(1, 1, cluster.getDataNodes().get(0).getShortCircuitRegistry());
    cluster.shutdown();
    sockDir.close();
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) DatanodeInfoBuilder(org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder) IOException(java.io.IOException) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) TemporarySocketDirectory(org.apache.hadoop.net.unix.TemporarySocketDirectory) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 5 with DataNodeFaultInjector

use of org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector in project hadoop by apache.

the class TestClientProtocolForPipelineRecovery method testPipelineRecoveryWithTransferBlock.

// Test to verify that blocks are no longer corrupted after HDFS-4660.
// Revert HDFS-4660 and the other related ones (HDFS-9220, HDFS-8722), this
// test would fail.
// Scenario: Prior to the fix, block get corrupted when the transferBlock
// happens during pipeline recovery with extra bytes to make up the end of
// chunk.
// For verification, Need to fail the pipeline for last datanode when the
// second datanode have more bytes on disk than already acked bytes.
// This will enable to transfer extra bytes to the newNode to makeup
// end-of-chunk during pipeline recovery. This is achieved by the customized
// DataNodeFaultInjector class in this test.
// For detailed info, please refer to HDFS-4660 and HDFS-10587. HDFS-9220
// fixes an issue in HDFS-4660 patch, and HDFS-8722 is an optimization.
@Test
public void testPipelineRecoveryWithTransferBlock() throws Exception {
    final int chunkSize = 512;
    final int oneWriteSize = 5000;
    final int totalSize = 1024 * 1024;
    final int errorInjectionPos = 512;
    Configuration conf = new HdfsConfiguration();
    // Need 4 datanodes to verify the replaceDatanode during pipeline recovery
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    DataNodeFaultInjector old = DataNodeFaultInjector.get();
    try {
        DistributedFileSystem fs = cluster.getFileSystem();
        Path fileName = new Path("/f");
        FSDataOutputStream o = fs.create(fileName);
        int count = 0;
        // Flush to get the pipeline created.
        o.writeBytes("hello");
        o.hflush();
        DFSOutputStream dfsO = (DFSOutputStream) o.getWrappedStream();
        final DatanodeInfo[] pipeline = dfsO.getStreamer().getNodes();
        final String lastDn = pipeline[2].getXferAddr(false);
        final AtomicBoolean failed = new AtomicBoolean(false);
        DataNodeFaultInjector.set(new DataNodeFaultInjector() {

            @Override
            public void failPipeline(ReplicaInPipeline replicaInfo, String mirror) throws IOException {
                if (!lastDn.equals(mirror)) {
                    // Only fail for second DN
                    return;
                }
                if (!failed.get() && (replicaInfo.getBytesAcked() > errorInjectionPos) && (replicaInfo.getBytesAcked() % chunkSize != 0)) {
                    int count = 0;
                    while (count < 10) {
                        // described in HDFS-4660 would occur.
                        if ((replicaInfo.getBytesOnDisk() / chunkSize) - (replicaInfo.getBytesAcked() / chunkSize) >= 1) {
                            failed.set(true);
                            throw new IOException("Failing Pipeline " + replicaInfo.getBytesAcked() + " : " + replicaInfo.getBytesOnDisk());
                        }
                        try {
                            Thread.sleep(200);
                        } catch (InterruptedException e) {
                        }
                        count++;
                    }
                }
            }
        });
        Random r = new Random();
        byte[] b = new byte[oneWriteSize];
        while (count < totalSize) {
            r.nextBytes(b);
            o.write(b);
            count += oneWriteSize;
            o.hflush();
        }
        assertTrue("Expected a failure in the pipeline", failed.get());
        DatanodeInfo[] newNodes = dfsO.getStreamer().getNodes();
        o.close();
        // Trigger block report to NN
        for (DataNode d : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(d);
        }
        // Read from the replaced datanode to verify the corruption. So shutdown
        // all other nodes in the pipeline.
        List<DatanodeInfo> pipelineList = Arrays.asList(pipeline);
        DatanodeInfo newNode = null;
        for (DatanodeInfo node : newNodes) {
            if (!pipelineList.contains(node)) {
                newNode = node;
                break;
            }
        }
        LOG.info("Number of nodes in pipeline: {} newNode {}", newNodes.length, newNode.getName());
        // shutdown old 2 nodes
        for (int i = 0; i < newNodes.length; i++) {
            if (newNodes[i].getName().equals(newNode.getName())) {
                continue;
            }
            LOG.info("shutdown {}", newNodes[i].getName());
            cluster.stopDataNode(newNodes[i].getName());
        }
        // Read should be successfull from only the newNode. There should not be
        // any corruption reported.
        DFSTestUtil.readFile(fs, fileName);
    } finally {
        DataNodeFaultInjector.set(old);
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Configuration(org.apache.hadoop.conf.Configuration) DataNodeFaultInjector(org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Random(java.util.Random) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ReplicaInPipeline(org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 DataNodeFaultInjector (org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector)6 Test (org.junit.Test)6 Configuration (org.apache.hadoop.conf.Configuration)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)2 Random (java.util.Random)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)1 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)1 DatanodeInfoBuilder (org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder)1 ReplicaInPipeline (org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline)1 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)1 TemporarySocketDirectory (org.apache.hadoop.net.unix.TemporarySocketDirectory)1 InvocationOnMock (org.mockito.invocation.InvocationOnMock)1