Search in sources :

Example 6 with FsDatasetSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.

the class TestDataNodeHotSwapVolumes method testRemoveVolumeBeingWrittenForDatanode.

/**
   * Test the case that remove a data volume on a particular DataNode when the
   * volume is actively being written.
   * @param dataNodeIdx the index of the DataNode to remove a volume.
   */
private void testRemoveVolumeBeingWrittenForDatanode(int dataNodeIdx) throws IOException, ReconfigurationException, TimeoutException, InterruptedException, BrokenBarrierException {
    // Starts DFS cluster with 3 DataNodes to form a pipeline.
    startDFSCluster(1, 3);
    final short REPLICATION = 3;
    final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
    final FileSystem fs = cluster.getFileSystem();
    final Path testFile = new Path("/test");
    FSDataOutputStream out = fs.create(testFile, REPLICATION);
    Random rb = new Random(0);
    // half of the block.
    byte[] writeBuf = new byte[BLOCK_SIZE / 2];
    rb.nextBytes(writeBuf);
    out.write(writeBuf);
    out.hflush();
    // Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the
    // BlockReceiver releases volume reference before finalizeBlock(), the blocks
    // on the volume will be removed, and finalizeBlock() throws IOE.
    final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
    dn.data = Mockito.spy(data);
    doAnswer(new Answer<Object>() {

        public Object answer(InvocationOnMock invocation) throws IOException, InterruptedException {
            Thread.sleep(1000);
            // Bypass the argument to FsDatasetImpl#finalizeBlock to verify that
            // the block is not removed, since the volume reference should not
            // be released at this point.
            data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]);
            return null;
        }
    }).when(dn.data).finalizeBlock(any(ExtendedBlock.class));
    final CyclicBarrier barrier = new CyclicBarrier(2);
    List<String> oldDirs = getDataDirs(dn);
    // Remove the first volume.
    final String newDirs = oldDirs.get(1);
    final List<Exception> exceptions = new ArrayList<>();
    Thread reconfigThread = new Thread() {

        public void run() {
            try {
                barrier.await();
                assertThat("DN did not update its own config", dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDirs), is(dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY)));
            } catch (ReconfigurationException | InterruptedException | BrokenBarrierException e) {
                exceptions.add(e);
            }
        }
    };
    reconfigThread.start();
    barrier.await();
    rb.nextBytes(writeBuf);
    out.write(writeBuf);
    out.hflush();
    out.close();
    reconfigThread.join();
    // Verify if the data directory reconfigure was successful
    FsDatasetSpi<? extends FsVolumeSpi> fsDatasetSpi = dn.getFSDataset();
    try (FsDatasetSpi.FsVolumeReferences fsVolumeReferences = fsDatasetSpi.getFsVolumeReferences()) {
        for (int i = 0; i < fsVolumeReferences.size(); i++) {
            System.out.println("Vol: " + fsVolumeReferences.get(i).getBaseURI().toString());
        }
        assertEquals("Volume remove wasn't successful.", 1, fsVolumeReferences.size());
    }
    // Verify the file has sufficient replications.
    DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
    // Read the content back
    byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
    assertEquals(BLOCK_SIZE, content.length);
    if (!exceptions.isEmpty()) {
        throw new IOException(exceptions.get(0).getCause());
    }
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ArrayList(java.util.ArrayList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Path(org.apache.hadoop.fs.Path) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ReconfigurationException(org.apache.hadoop.conf.ReconfigurationException) CyclicBarrier(java.util.concurrent.CyclicBarrier) InvocationOnMock(org.mockito.invocation.InvocationOnMock)

Example 7 with FsDatasetSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.

the class TestDataNodeVolumeFailure method testFailedVolumeBeingRemovedFromDataNode.

/**
   * Test that DataStorage and BlockPoolSliceStorage remove the failed volume
   * after failure.
   */
@Test(timeout = 150000)
public void testFailedVolumeBeingRemovedFromDataNode() throws Exception {
    // The test uses DataNodeTestUtils#injectDataDirFailure() to simulate
    // volume failures which is currently not supported on Windows.
    assumeNotWindows();
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short) 2);
    File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
    DataNode dn0 = cluster.getDataNodes().get(0);
    DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol1));
    // Verify dn0Vol1 has been completely removed from DN0.
    // 1. dn0Vol1 is removed from DataStorage.
    DataStorage storage = dn0.getStorage();
    assertEquals(1, storage.getNumStorageDirs());
    for (int i = 0; i < storage.getNumStorageDirs(); i++) {
        Storage.StorageDirectory sd = storage.getStorageDir(i);
        assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
    }
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    BlockPoolSliceStorage bpsStorage = storage.getBPStorage(bpid);
    assertEquals(1, bpsStorage.getNumStorageDirs());
    for (int i = 0; i < bpsStorage.getNumStorageDirs(); i++) {
        Storage.StorageDirectory sd = bpsStorage.getStorageDir(i);
        assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
    }
    // 2. dn0Vol1 is removed from FsDataset
    FsDatasetSpi<? extends FsVolumeSpi> data = dn0.getFSDataset();
    try (FsDatasetSpi.FsVolumeReferences vols = data.getFsVolumeReferences()) {
        for (FsVolumeSpi volume : vols) {
            assertFalse(new File(volume.getStorageLocation().getUri()).getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
        }
    }
    // 3. all blocks on dn0Vol1 have been removed.
    for (ReplicaInfo replica : FsDatasetTestUtil.getReplicas(data, bpid)) {
        assertNotNull(replica.getVolume());
        assertFalse(new File(replica.getVolume().getStorageLocation().getUri()).getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
    }
    // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore.
    String[] dataDirStrs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
    assertEquals(1, dataDirStrs.length);
    assertFalse(dataDirStrs[0].contains(dn0Vol1.getAbsolutePath()));
}
Also used : Path(org.apache.hadoop.fs.Path) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) Storage(org.apache.hadoop.hdfs.server.common.Storage) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) File(java.io.File) Test(org.junit.Test)

Example 8 with FsDatasetSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.

the class DatasetVolumeChecker method checkAllVolumes.

/**
   * Run checks against all volumes of a dataset.
   *
   * This check may be performed at service startup and subsequently at
   * regular intervals to detect and handle failed volumes.
   *
   * @param dataset - FsDatasetSpi to be checked.
   * @return set of failed volumes.
   */
public Set<FsVolumeSpi> checkAllVolumes(final FsDatasetSpi<? extends FsVolumeSpi> dataset) throws InterruptedException {
    final long gap = timer.monotonicNow() - lastAllVolumesCheck;
    if (gap < minDiskCheckGapMs) {
        numSkippedChecks.incrementAndGet();
        LOG.trace("Skipped checking all volumes, time since last check {} is less " + "than the minimum gap between checks ({} ms).", gap, minDiskCheckGapMs);
        return Collections.emptySet();
    }
    final FsDatasetSpi.FsVolumeReferences references = dataset.getFsVolumeReferences();
    if (references.size() == 0) {
        LOG.warn("checkAllVolumesAsync - no volumes can be referenced");
        return Collections.emptySet();
    }
    lastAllVolumesCheck = timer.monotonicNow();
    final Set<FsVolumeSpi> healthyVolumes = new HashSet<>();
    final Set<FsVolumeSpi> failedVolumes = new HashSet<>();
    final Set<FsVolumeSpi> allVolumes = new HashSet<>();
    final AtomicLong numVolumes = new AtomicLong(references.size());
    final CountDownLatch latch = new CountDownLatch(1);
    for (int i = 0; i < references.size(); ++i) {
        final FsVolumeReference reference = references.getReference(i);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(reference.getVolume(), IGNORED_CONTEXT);
        LOG.info("Scheduled health check for volume {}", reference.getVolume());
        if (olf.isPresent()) {
            allVolumes.add(reference.getVolume());
            Futures.addCallback(olf.get(), new ResultHandler(reference, healthyVolumes, failedVolumes, numVolumes, new Callback() {

                @Override
                public void call(Set<FsVolumeSpi> ignored1, Set<FsVolumeSpi> ignored2) {
                    latch.countDown();
                }
            }));
        } else {
            IOUtils.cleanup(null, reference);
            if (numVolumes.decrementAndGet() == 0) {
                latch.countDown();
            }
        }
    }
    // the remaining volumes.
    if (!latch.await(maxAllowedTimeForCheckMs, TimeUnit.MILLISECONDS)) {
        LOG.warn("checkAllVolumes timed out after {} ms" + maxAllowedTimeForCheckMs);
    }
    numSyncDatasetChecks.incrementAndGet();
    synchronized (this) {
        // of a potentially changing set.
        return new HashSet<>(Sets.difference(allVolumes, healthyVolumes));
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) FsVolumeReference(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference) FutureCallback(com.google.common.util.concurrent.FutureCallback) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) HashSet(java.util.HashSet)

Aggregations

FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)8 FsVolumeSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi)4 File (java.io.File)3 Path (org.apache.hadoop.fs.Path)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 ReconfigurationException (org.apache.hadoop.conf.ReconfigurationException)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)2 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)2 Block (org.apache.hadoop.hdfs.protocol.Block)2 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)2 Storage (org.apache.hadoop.hdfs.server.common.Storage)2 DatanodeStorage (org.apache.hadoop.hdfs.server.protocol.DatanodeStorage)2 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)2 Matchers.anyString (org.mockito.Matchers.anyString)2 FutureCallback (com.google.common.util.concurrent.FutureCallback)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 HashSet (java.util.HashSet)1