Search in sources :

Example 1 with ReconfigurationException

use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.

the class TestNameNodeReconfigure method testReconfigureHearbeatCheck.

/**
   * Test to reconfigure interval of heart beat check and re-check.
   */
@Test
public void testReconfigureHearbeatCheck() throws ReconfigurationException {
    final NameNode nameNode = cluster.getNameNode();
    final DatanodeManager datanodeManager = nameNode.namesystem.getBlockManager().getDatanodeManager();
    // change properties
    nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "" + 6);
    nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "" + (10 * 60 * 1000));
    // try invalid values
    try {
        nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "text");
        fail("ReconfigurationException expected");
    } catch (ReconfigurationException expected) {
        assertTrue(expected.getCause() instanceof NumberFormatException);
    }
    try {
        nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "text");
        fail("ReconfigurationException expected");
    } catch (ReconfigurationException expected) {
        assertTrue(expected.getCause() instanceof NumberFormatException);
    }
    // verify change
    assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, nameNode.getConf().getLong(DFS_HEARTBEAT_INTERVAL_KEY, DFS_HEARTBEAT_INTERVAL_DEFAULT));
    assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, datanodeManager.getHeartbeatInterval());
    assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, nameNode.getConf().getInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT));
    assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, datanodeManager.getHeartbeatRecheckInterval());
    // revert to defaults
    nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, null);
    nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, null);
    // verify defaults
    assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_HEARTBEAT_INTERVAL_KEY));
    assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", DFS_HEARTBEAT_INTERVAL_DEFAULT, datanodeManager.getHeartbeatInterval());
    assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY));
    assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT, datanodeManager.getHeartbeatRecheckInterval());
}
Also used : DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) Test(org.junit.Test)

Example 2 with ReconfigurationException

use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.

the class TestDataNodeHotSwapVolumes method testAddVolumeFailures.

@Test
public void testAddVolumeFailures() throws IOException {
    startDFSCluster(1, 1);
    final String dataDir = cluster.getDataDirectory();
    DataNode dn = cluster.getDataNodes().get(0);
    List<String> newDirs = Lists.newArrayList();
    final int NUM_NEW_DIRS = 4;
    for (int i = 0; i < NUM_NEW_DIRS; i++) {
        File newVolume = new File(dataDir, "new_vol" + i);
        newDirs.add(newVolume.toString());
        if (i % 2 == 0) {
            // Make addVolume() fail.
            newVolume.createNewFile();
        }
    }
    String newValue = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY) + "," + Joiner.on(",").join(newDirs);
    try {
        dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newValue);
        fail("Expect to throw IOException.");
    } catch (ReconfigurationException e) {
        String errorMessage = e.getCause().getMessage();
        String[] messages = errorMessage.split("\\r?\\n");
        assertEquals(2, messages.length);
        assertThat(messages[0], containsString("new_vol0"));
        assertThat(messages[1], containsString("new_vol2"));
    }
    // Make sure that vol0 and vol2's metadata are not left in memory.
    FsDatasetSpi<?> dataset = dn.getFSDataset();
    try (FsDatasetSpi.FsVolumeReferences volumes = dataset.getFsVolumeReferences()) {
        for (FsVolumeSpi volume : volumes) {
            assertThat(new File(volume.getStorageLocation().getUri()).toString(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
        }
    }
    DataStorage storage = dn.getStorage();
    for (int i = 0; i < storage.getNumStorageDirs(); i++) {
        Storage.StorageDirectory sd = storage.getStorageDir(i);
        assertThat(sd.getRoot().toString(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
    }
    // The newly effective conf does not have vol0 and vol2.
    String[] effectiveVolumes = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY).split(",");
    assertEquals(4, effectiveVolumes.length);
    for (String ev : effectiveVolumes) {
        assertThat(new File(StorageLocation.parse(ev).getUri()).getCanonicalPath(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
    }
}
Also used : FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Storage(org.apache.hadoop.hdfs.server.common.Storage) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) File(java.io.File) Test(org.junit.Test)

Example 3 with ReconfigurationException

use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.

the class TestDataNodeHotSwapVolumes method testRemoveVolumeBeingWrittenForDatanode.

/**
   * Test the case that remove a data volume on a particular DataNode when the
   * volume is actively being written.
   * @param dataNodeIdx the index of the DataNode to remove a volume.
   */
private void testRemoveVolumeBeingWrittenForDatanode(int dataNodeIdx) throws IOException, ReconfigurationException, TimeoutException, InterruptedException, BrokenBarrierException {
    // Starts DFS cluster with 3 DataNodes to form a pipeline.
    startDFSCluster(1, 3);
    final short REPLICATION = 3;
    final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
    final FileSystem fs = cluster.getFileSystem();
    final Path testFile = new Path("/test");
    FSDataOutputStream out = fs.create(testFile, REPLICATION);
    Random rb = new Random(0);
    // half of the block.
    byte[] writeBuf = new byte[BLOCK_SIZE / 2];
    rb.nextBytes(writeBuf);
    out.write(writeBuf);
    out.hflush();
    // Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the
    // BlockReceiver releases volume reference before finalizeBlock(), the blocks
    // on the volume will be removed, and finalizeBlock() throws IOE.
    final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
    dn.data = Mockito.spy(data);
    doAnswer(new Answer<Object>() {

        public Object answer(InvocationOnMock invocation) throws IOException, InterruptedException {
            Thread.sleep(1000);
            // Bypass the argument to FsDatasetImpl#finalizeBlock to verify that
            // the block is not removed, since the volume reference should not
            // be released at this point.
            data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]);
            return null;
        }
    }).when(dn.data).finalizeBlock(any(ExtendedBlock.class));
    final CyclicBarrier barrier = new CyclicBarrier(2);
    List<String> oldDirs = getDataDirs(dn);
    // Remove the first volume.
    final String newDirs = oldDirs.get(1);
    final List<Exception> exceptions = new ArrayList<>();
    Thread reconfigThread = new Thread() {

        public void run() {
            try {
                barrier.await();
                assertThat("DN did not update its own config", dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDirs), is(dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY)));
            } catch (ReconfigurationException | InterruptedException | BrokenBarrierException e) {
                exceptions.add(e);
            }
        }
    };
    reconfigThread.start();
    barrier.await();
    rb.nextBytes(writeBuf);
    out.write(writeBuf);
    out.hflush();
    out.close();
    reconfigThread.join();
    // Verify if the data directory reconfigure was successful
    FsDatasetSpi<? extends FsVolumeSpi> fsDatasetSpi = dn.getFSDataset();
    try (FsDatasetSpi.FsVolumeReferences fsVolumeReferences = fsDatasetSpi.getFsVolumeReferences()) {
        for (int i = 0; i < fsVolumeReferences.size(); i++) {
            System.out.println("Vol: " + fsVolumeReferences.get(i).getBaseURI().toString());
        }
        assertEquals("Volume remove wasn't successful.", 1, fsVolumeReferences.size());
    }
    // Verify the file has sufficient replications.
    DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
    // Read the content back
    byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
    assertEquals(BLOCK_SIZE, content.length);
    if (!exceptions.isEmpty()) {
        throw new IOException(exceptions.get(0).getCause());
    }
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ArrayList(java.util.ArrayList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Path(org.apache.hadoop.fs.Path) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) CyclicBarrier(java.util.concurrent.CyclicBarrier) InvocationOnMock(org.mockito.invocation.InvocationOnMock)

Example 4 with ReconfigurationException

use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.

the class TestDataNodeHotSwapVolumes method testAddVolumesConcurrently.

@Test(timeout = 180000)
public void testAddVolumesConcurrently() throws IOException, InterruptedException, TimeoutException, ReconfigurationException {
    startDFSCluster(1, 1, 10);
    int numVolumes = cluster.getStoragesPerDatanode();
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    Path testFile = new Path("/test");
    // Each volume has 2 blocks
    int initialBlockCount = numVolumes * 2;
    createFile(testFile, initialBlockCount);
    DataNode dn = cluster.getDataNodes().get(0);
    final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
    dn.data = Mockito.spy(data);
    final int newVolumeCount = 40;
    List<Thread> addVolumeDelayedThreads = new ArrayList<>();
    AtomicBoolean addVolumeError = new AtomicBoolean(false);
    AtomicBoolean listStorageError = new AtomicBoolean(false);
    CountDownLatch addVolumeCompletionLatch = new CountDownLatch(newVolumeCount);
    // Thread to list all storage available at DataNode,
    // when the volumes are being added in parallel.
    final Thread listStorageThread = new Thread(new Runnable() {

        @Override
        public void run() {
            while (addVolumeCompletionLatch.getCount() != newVolumeCount) {
                int i = 0;
                while (i++ < 1000) {
                    try {
                        dn.getStorage().listStorageDirectories();
                    } catch (Exception e) {
                        listStorageError.set(true);
                        LOG.error("Error listing storage: " + e);
                    }
                }
            }
        }
    });
    listStorageThread.start();
    // FsDatasetImpl addVolume mocked to perform the operation asynchronously
    doAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            final Random r = new Random();
            Thread addVolThread = new Thread(new Runnable() {

                @Override
                public void run() {
                    try {
                        r.setSeed(Time.now());
                        // start after an initial delay.
                        if (r.nextInt(10) > 4) {
                            int s = r.nextInt(10) + 1;
                            Thread.sleep(s * 100);
                        }
                        invocationOnMock.callRealMethod();
                    } catch (Throwable throwable) {
                        addVolumeError.set(true);
                        LOG.error("Error adding volume: " + throwable);
                    } finally {
                        addVolumeCompletionLatch.countDown();
                    }
                }
            });
            addVolumeDelayedThreads.add(addVolThread);
            addVolThread.start();
            return null;
        }
    }).when(dn.data).addVolume(any(StorageLocation.class), any(List.class));
    addVolumes(newVolumeCount, addVolumeCompletionLatch);
    numVolumes += newVolumeCount;
    // Wait for all addVolume and listStorage Threads to complete
    for (Thread t : addVolumeDelayedThreads) {
        t.join();
    }
    listStorageThread.join();
    // Verify errors while adding volumes and listing storage directories
    Assert.assertEquals("Error adding volumes!", false, addVolumeError.get());
    Assert.assertEquals("Error listing storage!", false, listStorageError.get());
    int additionalBlockCount = 9;
    int totalBlockCount = initialBlockCount + additionalBlockCount;
    // Continue to write the same file, thus the new volumes will have blocks.
    DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * additionalBlockCount);
    verifyFileLength(cluster.getFileSystem(), testFile, totalBlockCount);
    List<Map<DatanodeStorage, BlockListAsLongs>> blockReports = cluster.getAllBlockReports(blockPoolId);
    assertEquals(1, blockReports.size());
    assertEquals(numVolumes, blockReports.get(0).size());
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Random(java.util.Random) InvocationOnMock(org.mockito.invocation.InvocationOnMock) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) Test(org.junit.Test)

Example 5 with ReconfigurationException

use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.

the class TestDataNodeVolumeFailureReporting method testHotSwapOutFailedVolumeAndReporting.

/**
   * Verify DataNode NumFailedVolumes and FailedStorageLocations
   * after hot swap out of failed volume.
   */
@Test
public void testHotSwapOutFailedVolumeAndReporting() throws Exception {
    final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
    final DataNode dn0 = cluster.getDataNodes().get(0);
    final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    ObjectName mxbeanName = new ObjectName("Hadoop:service=DataNode,name=FSDatasetState-" + dn0.getDatanodeUuid());
    int numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 0, false, new String[] {});
    // Fail dn0Vol1 first.
    // Verify NumFailedVolumes and FailedStorageLocations are empty.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
    DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol1));
    numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(1, numFailedVolumes);
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol1.getAbsolutePath() });
    // Verify NumFailedVolumes and FailedStorageLocations haven't changed.
    try {
        dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs);
        fail("Reconfigure with failed disk should throw exception.");
    } catch (ReconfigurationException e) {
        Assert.assertTrue("Reconfigure exception doesn't have expected path!", e.getCause().getMessage().contains(dn0Vol1.getAbsolutePath()));
    }
    numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(1, numFailedVolumes);
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol1.getAbsolutePath() });
    // Hot swap out the failed volume.
    // Verify NumFailedVolumes and FailedStorageLocations are reset.
    String dataDirs = dn0Vol2.getPath();
    dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirs);
    numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(0, numFailedVolumes);
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 0, true, new String[] {});
    // Fix failure volume dn0Vol1 and remount it back.
    // Verify NumFailedVolumes and FailedStorageLocations are empty.
    DataNodeTestUtils.restoreDataDirFromFailure(dn0Vol1);
    dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs);
    numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(0, numFailedVolumes);
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 0, true, new String[] {});
    // Fail dn0Vol2.
    // Verify NumFailedVolumes and FailedStorageLocations are updated.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol2);
    DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol2));
    numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
    Assert.assertEquals(1, numFailedVolumes);
    Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
    checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol2.getAbsolutePath() });
    // Verify DataNode tolerating one disk failure.
    assertTrue(dn0.shouldRun());
}
Also used : ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) File(java.io.File) MBeanServer(javax.management.MBeanServer) ObjectName(javax.management.ObjectName) Test(org.junit.Test)

Aggregations

ReconfigurationException (org.apache.hadoop.conf.ReconfigurationException)7 Test (org.junit.Test)4 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 Matchers.anyString (org.mockito.Matchers.anyString)3 File (java.io.File)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Random (java.util.Random)2 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)2 TimeoutException (java.util.concurrent.TimeoutException)2 Path (org.apache.hadoop.fs.Path)2 BlockMissingException (org.apache.hadoop.hdfs.BlockMissingException)2 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 List (java.util.List)1 Map (java.util.Map)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 CyclicBarrier (java.util.concurrent.CyclicBarrier)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 MBeanServer (javax.management.MBeanServer)1