Search in sources :

Example 1 with DelayAnswer

use of org.apache.hadoop.test.GenericTestUtils.DelayAnswer in project hadoop by apache.

the class TestStandbyCheckpoints method testStandbyExceptionThrownDuringCheckpoint.

/**
   * Make sure that clients will receive StandbyExceptions even when a
   * checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer
   * thread will have FSNS lock. Regression test for HDFS-4591.
   */
@Test(timeout = 300000)
public void testStandbyExceptionThrownDuringCheckpoint() throws Exception {
    // Set it up so that we know when the SBN checkpoint starts and ends.
    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]);
    DelayAnswer answerer = new DelayAnswer(LOG);
    Mockito.doAnswer(answerer).when(spyImage1).saveNamespace(Mockito.any(FSNamesystem.class), Mockito.eq(NameNodeFile.IMAGE), Mockito.any(Canceler.class));
    // Perform some edits and wait for a checkpoint to start on the SBN.
    doEdits(0, 1000);
    nns[0].getRpcServer().rollEditLog();
    answerer.waitForCall();
    assertTrue("SBN is not performing checkpoint but it should be.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
    // Make sure that the lock has actually been taken by the checkpointing
    // thread.
    ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    try {
        // Perform an RPC to the SBN and make sure it throws a StandbyException.
        nns[1].getRpcServer().getFileInfo("/");
        fail("Should have thrown StandbyException, but instead succeeded.");
    } catch (StandbyException se) {
        GenericTestUtils.assertExceptionContains("is not supported", se);
    }
    // Make sure new incremental block reports are processed during
    // checkpointing on the SBN.
    assertEquals(0, cluster.getNamesystem(1).getPendingDataNodeMessageCount());
    doCreate();
    Thread.sleep(1000);
    assertTrue(cluster.getNamesystem(1).getPendingDataNodeMessageCount() > 0);
    // Make sure that the checkpoint is still going on, implying that the client
    // RPC to the SBN happened during the checkpoint.
    assertTrue("SBN should have still been checkpointing.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
    answerer.proceed();
    answerer.waitForResult();
    assertTrue("SBN should have finished checkpointing.", answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
}
Also used : StandbyException(org.apache.hadoop.ipc.StandbyException) Canceler(org.apache.hadoop.hdfs.util.Canceler) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) Test(org.junit.Test)

Example 2 with DelayAnswer

use of org.apache.hadoop.test.GenericTestUtils.DelayAnswer in project hadoop by apache.

the class TestCheckpoint method testMultipleSecondaryNNsAgainstSameNN.

/**
   * Test case where two secondary namenodes are checkpointing the same
   * NameNode. This differs from {@link #testMultipleSecondaryNamenodes()}
   * since that test runs against two distinct NNs.
   * 
   * This case tests the following interleaving:
   * - 2NN A downloads image (up to txid 2)
   * - 2NN A about to save its own checkpoint
   * - 2NN B downloads image (up to txid 4)
   * - 2NN B uploads checkpoint (txid 4)
   * - 2NN A uploads checkpoint (txid 2)
   * 
   * It verifies that this works even though the earlier-txid checkpoint gets
   * uploaded after the later-txid checkpoint.
   */
@Test
public void testMultipleSecondaryNNsAgainstSameNN() throws Exception {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    SecondaryNameNode secondary1 = null, secondary2 = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(true).build();
        // Start 2NNs
        secondary1 = startSecondaryNameNode(conf, 1);
        secondary2 = startSecondaryNameNode(conf, 2);
        // Make the first 2NN's checkpoint process delayable - we can pause it
        // right before it saves its checkpoint image.
        CheckpointStorage spyImage1 = spyOnSecondaryImage(secondary1);
        DelayAnswer delayer = new DelayAnswer(LOG);
        Mockito.doAnswer(delayer).when(spyImage1).saveFSImageInAllDirs(Mockito.<FSNamesystem>any(), Mockito.anyLong());
        // Set up a thread to do a checkpoint from the first 2NN
        DoCheckpointThread checkpointThread = new DoCheckpointThread(secondary1);
        checkpointThread.start();
        // Wait for the first checkpointer to get to where it should save its image.
        delayer.waitForCall();
        // Now make the second checkpointer run an entire checkpoint
        secondary2.doCheckpoint();
        // Let the first one finish
        delayer.proceed();
        // It should have succeeded even though another checkpoint raced with it.
        checkpointThread.join();
        checkpointThread.propagateExceptions();
        // primary should record "last checkpoint" as the higher txid (even though
        // a checkpoint with a lower txid finished most recently)
        NNStorage storage = cluster.getNameNode().getFSImage().getStorage();
        assertEquals(4, storage.getMostRecentCheckpointTxId());
        // Should have accepted both checkpoints
        assertNNHasCheckpoints(cluster, ImmutableList.of(2, 4));
        // Now have second one checkpoint one more time just to make sure that
        // the NN isn't left in a broken state
        secondary2.doCheckpoint();
        // NN should have received new checkpoint
        assertEquals(6, storage.getMostRecentCheckpointTxId());
        // Validate invariant that files named the same are the same.
        assertParallelFilesInvariant(cluster, ImmutableList.of(secondary1, secondary2));
        // NN should have removed the checkpoint at txid 2 at this point, but has
        // one at txid 6
        assertNNHasCheckpoints(cluster, ImmutableList.of(4, 6));
    } finally {
        cleanup(secondary1);
        secondary1 = null;
        cleanup(secondary2);
        secondary2 = null;
        if (cluster != null) {
            cluster.shutdown();
            cluster = null;
        }
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) CheckpointStorage(org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode.CheckpointStorage) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 3 with DelayAnswer

use of org.apache.hadoop.test.GenericTestUtils.DelayAnswer in project hadoop by apache.

the class TestReplication method testNoExtraReplicationWhenBlockReceivedIsLate.

/**
   * This test makes sure that, when a file is closed before all
   * of the datanodes in the pipeline have reported their replicas,
   * the NameNode doesn't consider the block under-replicated too
   * aggressively. It is a regression test for HDFS-1172.
   */
@Test(timeout = 60000)
public void testNoExtraReplicationWhenBlockReceivedIsLate() throws Exception {
    LOG.info("Test block replication when blockReceived is late");
    final short numDataNodes = 3;
    final short replication = 3;
    final Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
    final String testFile = "/replication-test-file";
    final Path testPath = new Path(testFile);
    final BlockManager bm = cluster.getNameNode().getNamesystem().getBlockManager();
    try {
        cluster.waitActive();
        // Artificially delay IBR from 1 DataNode.
        // this ensures that the client's completeFile() RPC will get to the
        // NN before some of the replicas are reported.
        NameNode nn = cluster.getNameNode();
        DataNode dn = cluster.getDataNodes().get(0);
        DatanodeProtocolClientSideTranslatorPB spy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn);
        DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG);
        Mockito.doAnswer(delayer).when(spy).blockReceivedAndDeleted(Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageReceivedDeletedBlocks[]>anyObject());
        FileSystem fs = cluster.getFileSystem();
        // Create and close a small file with two blocks
        DFSTestUtil.createFile(fs, testPath, 1500, replication, 0);
        // schedule replication via BlockManager#computeReplicationWork
        BlockManagerTestUtil.computeAllPendingWork(bm);
        // Initially, should have some pending replication since the close()
        // is earlier than at lease one of the reportReceivedDeletedBlocks calls
        assertTrue(pendingReplicationCount(bm) > 0);
        // release pending IBR.
        delayer.waitForCall();
        delayer.proceed();
        delayer.waitForResult();
        // make sure DataNodes do replication work if exists
        for (DataNode d : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerHeartbeat(d);
        }
        // Wait until there is nothing pending
        try {
            GenericTestUtils.waitFor(new Supplier<Boolean>() {

                @Override
                public Boolean get() {
                    return pendingReplicationCount(bm) == 0;
                }
            }, 100, 3000);
        } catch (TimeoutException e) {
            fail("timed out while waiting for no pending replication.");
        }
        // Check that none of the datanodes have serviced a replication request.
        // i.e. that the NameNode didn't schedule any spurious replication.
        assertNoReplicationWasPerformed(cluster);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Configuration(org.apache.hadoop.conf.Configuration) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) StorageReceivedDeletedBlocks(org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 4 with DelayAnswer

use of org.apache.hadoop.test.GenericTestUtils.DelayAnswer in project hadoop by apache.

the class TestPipelinesFailover method testFailoverRightBeforeCommitSynchronization.

/**
   * Test the scenario where the NN fails over after issuing a block
   * synchronization request, but before it is committed. The
   * DN running the recovery should then fail to commit the synchronization
   * and a later retry will succeed.
   */
@Test(timeout = 30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
    final Configuration conf = new Configuration();
    // Disable permissions so that another user can recover the lease.
    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    FSDataOutputStream stm = null;
    final MiniDFSCluster cluster = newMiniCluster(conf, 3);
    try {
        cluster.waitActive();
        cluster.transitionToActive(0);
        Thread.sleep(500);
        LOG.info("Starting with NN 0 active");
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        stm = fs.create(TEST_PATH);
        // write a half block
        AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
        stm.hflush();
        // Look into the block manager on the active node for the block
        // under construction.
        NameNode nn0 = cluster.getNameNode(0);
        ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
        DatanodeDescriptor expectedPrimary = DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
        LOG.info("Expecting block recovery to be triggered on DN " + expectedPrimary);
        // Find the corresponding DN daemon, and spy on its connection to the
        // active.
        DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
        DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
        // Delay the commitBlockSynchronization call
        DelayAnswer delayer = new DelayAnswer(LOG);
        Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(Mockito.eq(blk), // new genstamp
        Mockito.anyInt(), // new length
        Mockito.anyLong(), // close file
        Mockito.eq(true), // delete block
        Mockito.eq(false), // new targets
        (DatanodeID[]) Mockito.anyObject(), // new target storages
        (String[]) Mockito.anyObject());
        DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
        assertFalse(fsOtherUser.recoverLease(TEST_PATH));
        LOG.info("Waiting for commitBlockSynchronization call from primary");
        delayer.waitForCall();
        LOG.info("Failing over to NN 1");
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        // Let the commitBlockSynchronization call go through, and check that
        // it failed with the correct exception.
        delayer.proceed();
        delayer.waitForResult();
        Throwable t = delayer.getThrown();
        if (t == null) {
            fail("commitBlockSynchronization call did not fail on standby");
        }
        GenericTestUtils.assertExceptionContains("Operation category WRITE is not supported", t);
        // Now, if we try again to recover the block, it should succeed on the new
        // active.
        loopRecoverLease(fsOtherUser, TEST_PATH);
        AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE / 2);
    } finally {
        IOUtils.closeStream(stm);
        cluster.shutdown();
    }
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 5 with DelayAnswer

use of org.apache.hadoop.test.GenericTestUtils.DelayAnswer in project hadoop by apache.

the class TestDNFencing method testRBWReportArrivesAfterEdits.

/**
   * Another regression test for HDFS-2742. This tests the following sequence:
   * - DN does a block report while file is open. This BR contains
   *   the block in RBW state.
   * - The block report is delayed in reaching the standby.
   * - The file is closed.
   * - The standby processes the OP_ADD and OP_CLOSE operations before
   *   the RBW block report arrives.
   * - The standby should not mark the block as corrupt.
   */
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
    final CountDownLatch brFinished = new CountDownLatch(1);
    DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {

        @Override
        protected Object passThrough(InvocationOnMock invocation) throws Throwable {
            try {
                return super.passThrough(invocation);
            } finally {
                // inform the test that our block report went through.
                brFinished.countDown();
            }
        }
    };
    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
        AppendTestUtil.write(out, 0, 10);
        out.hflush();
        DataNode dn = cluster.getDataNodes().get(0);
        DatanodeProtocolClientSideTranslatorPB spy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn2);
        Mockito.doAnswer(delayer).when(spy).blockReport(Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageBlockReport[]>anyObject(), Mockito.<BlockReportContext>anyObject());
        dn.scheduleAllBlockReport(0);
        delayer.waitForCall();
    } finally {
        IOUtils.closeStream(out);
    }
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    delayer.proceed();
    brFinished.await();
    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
Also used : DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) StorageBlockReport(org.apache.hadoop.hdfs.server.protocol.StorageBlockReport) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CountDownLatch(java.util.concurrent.CountDownLatch) Test(org.junit.Test)

Aggregations

DelayAnswer (org.apache.hadoop.test.GenericTestUtils.DelayAnswer)11 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)6 DatanodeProtocolClientSideTranslatorPB (org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)4 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)4 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)4 Path (org.apache.hadoop.fs.Path)3 NameNode (org.apache.hadoop.hdfs.server.namenode.NameNode)3 Canceler (org.apache.hadoop.hdfs.util.Canceler)3 MetricsRecordBuilder (org.apache.hadoop.metrics2.MetricsRecordBuilder)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 IOException (java.io.IOException)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)2 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)2 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)2