Search in sources :

Example 21 with FsVolumeSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi in project hadoop by apache.

the class FsDatasetImpl method getBlockReports.

@Override
public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid) {
    Map<DatanodeStorage, BlockListAsLongs> blockReportsMap = new HashMap<DatanodeStorage, BlockListAsLongs>();
    Map<String, BlockListAsLongs.Builder> builders = new HashMap<String, BlockListAsLongs.Builder>();
    List<FsVolumeImpl> curVolumes = null;
    try (AutoCloseableLock lock = datasetLock.acquire()) {
        curVolumes = volumes.getVolumes();
        for (FsVolumeSpi v : curVolumes) {
            builders.put(v.getStorageID(), BlockListAsLongs.builder(maxDataLength));
        }
        Set<String> missingVolumesReported = new HashSet<>();
        for (ReplicaInfo b : volumeMap.replicas(bpid)) {
            String volStorageID = b.getVolume().getStorageID();
            if (!builders.containsKey(volStorageID)) {
                if (!missingVolumesReported.contains(volStorageID)) {
                    LOG.warn("Storage volume: " + volStorageID + " missing for the" + " replica block: " + b + ". Probably being removed!");
                    missingVolumesReported.add(volStorageID);
                }
                continue;
            }
            switch(b.getState()) {
                case FINALIZED:
                case RBW:
                case RWR:
                    builders.get(b.getVolume().getStorageID()).add(b);
                    break;
                case RUR:
                    ReplicaInfo orig = b.getOriginalReplica();
                    builders.get(b.getVolume().getStorageID()).add(orig);
                    break;
                case TEMPORARY:
                    break;
                default:
                    assert false : "Illegal ReplicaInfo state.";
            }
        }
    }
    for (FsVolumeImpl v : curVolumes) {
        blockReportsMap.put(v.toDatanodeStorage(), builders.get(v.getStorageID()).build());
    }
    return blockReportsMap;
}
Also used : ReplicaInfo(org.apache.hadoop.hdfs.server.datanode.ReplicaInfo) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ReplicaBuilder(org.apache.hadoop.hdfs.server.datanode.ReplicaBuilder) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) AutoCloseableLock(org.apache.hadoop.util.AutoCloseableLock) BlockListAsLongs(org.apache.hadoop.hdfs.protocol.BlockListAsLongs) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) HashSet(java.util.HashSet)

Example 22 with FsVolumeSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi in project hadoop by apache.

the class FsVolumeList method handleVolumeFailures.

/**
   * Calls {@link FsVolumeImpl#checkDirs()} on each volume.
   * 
   * Use {@link checkDirsLock} to allow only one instance of checkDirs() call.
   *
   * @return list of all the failed volumes.
   * @param failedVolumes
   */
void handleVolumeFailures(Set<FsVolumeSpi> failedVolumes) {
    try (AutoCloseableLock lock = checkDirsLock.acquire()) {
        for (FsVolumeSpi vol : failedVolumes) {
            FsVolumeImpl fsv = (FsVolumeImpl) vol;
            try (FsVolumeReference ref = fsv.obtainReference()) {
                addVolumeFailureInfo(fsv);
                removeVolume(fsv);
            } catch (ClosedChannelException e) {
                FsDatasetImpl.LOG.debug("Caught exception when obtaining " + "reference count on closed volume", e);
            } catch (IOException e) {
                FsDatasetImpl.LOG.error("Unexpected IOException", e);
            }
        }
        waitVolumeRemoved(5000, checkDirsLockCondition);
    }
}
Also used : ClosedChannelException(java.nio.channels.ClosedChannelException) FsVolumeReference(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference) AutoCloseableLock(org.apache.hadoop.util.AutoCloseableLock) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) IOException(java.io.IOException)

Example 23 with FsVolumeSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi in project hadoop by apache.

the class DatasetVolumeChecker method checkAllVolumes.

/**
   * Run checks against all volumes of a dataset.
   *
   * This check may be performed at service startup and subsequently at
   * regular intervals to detect and handle failed volumes.
   *
   * @param dataset - FsDatasetSpi to be checked.
   * @return set of failed volumes.
   */
public Set<FsVolumeSpi> checkAllVolumes(final FsDatasetSpi<? extends FsVolumeSpi> dataset) throws InterruptedException {
    final long gap = timer.monotonicNow() - lastAllVolumesCheck;
    if (gap < minDiskCheckGapMs) {
        numSkippedChecks.incrementAndGet();
        LOG.trace("Skipped checking all volumes, time since last check {} is less " + "than the minimum gap between checks ({} ms).", gap, minDiskCheckGapMs);
        return Collections.emptySet();
    }
    final FsDatasetSpi.FsVolumeReferences references = dataset.getFsVolumeReferences();
    if (references.size() == 0) {
        LOG.warn("checkAllVolumesAsync - no volumes can be referenced");
        return Collections.emptySet();
    }
    lastAllVolumesCheck = timer.monotonicNow();
    final Set<FsVolumeSpi> healthyVolumes = new HashSet<>();
    final Set<FsVolumeSpi> failedVolumes = new HashSet<>();
    final Set<FsVolumeSpi> allVolumes = new HashSet<>();
    final AtomicLong numVolumes = new AtomicLong(references.size());
    final CountDownLatch latch = new CountDownLatch(1);
    for (int i = 0; i < references.size(); ++i) {
        final FsVolumeReference reference = references.getReference(i);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(reference.getVolume(), IGNORED_CONTEXT);
        LOG.info("Scheduled health check for volume {}", reference.getVolume());
        if (olf.isPresent()) {
            allVolumes.add(reference.getVolume());
            Futures.addCallback(olf.get(), new ResultHandler(reference, healthyVolumes, failedVolumes, numVolumes, new Callback() {

                @Override
                public void call(Set<FsVolumeSpi> ignored1, Set<FsVolumeSpi> ignored2) {
                    latch.countDown();
                }
            }));
        } else {
            IOUtils.cleanup(null, reference);
            if (numVolumes.decrementAndGet() == 0) {
                latch.countDown();
            }
        }
    }
    // the remaining volumes.
    if (!latch.await(maxAllowedTimeForCheckMs, TimeUnit.MILLISECONDS)) {
        LOG.warn("checkAllVolumes timed out after {} ms" + maxAllowedTimeForCheckMs);
    }
    numSyncDatasetChecks.incrementAndGet();
    synchronized (this) {
        // of a potentially changing set.
        return new HashSet<>(Sets.difference(allVolumes, healthyVolumes));
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) FsVolumeReference(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference) FutureCallback(com.google.common.util.concurrent.FutureCallback) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) HashSet(java.util.HashSet)

Example 24 with FsVolumeSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi in project hadoop by apache.

the class TestStorageMover method setVolumeFull.

private void setVolumeFull(DataNode dn, StorageType type) {
    try (FsDatasetSpi.FsVolumeReferences refs = dn.getFSDataset().getFsVolumeReferences()) {
        for (FsVolumeSpi fvs : refs) {
            FsVolumeImpl volume = (FsVolumeImpl) fvs;
            if (volume.getStorageType() == type) {
                LOG.info("setCapacity to 0 for [" + volume.getStorageType() + "]" + volume.getStorageID());
                volume.setCapacityForTesting(0);
            }
        }
    } catch (IOException e) {
        LOG.error("Unexpected exception by closing FsVolumeReference", e);
    }
}
Also used : FsVolumeImpl(org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) IOException(java.io.IOException)

Example 25 with FsVolumeSpi

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi in project hadoop by apache.

the class TestNameNodePrunesMissingStorages method testRemovingStorageDoesNotProduceZombies.

/**
   * Regression test for HDFS-7960.<p/>
   *
   * Shutting down a datanode, removing a storage directory, and restarting
   * the DataNode should not produce zombie storages.
   */
@Test(timeout = 300000)
public void testRemovingStorageDoesNotProduceZombies() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000);
    final int NUM_STORAGES_PER_DN = 2;
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).storagesPerDatanode(NUM_STORAGES_PER_DN).build();
    try {
        cluster.waitActive();
        for (DataNode dn : cluster.getDataNodes()) {
            assertEquals(NUM_STORAGES_PER_DN, cluster.getNamesystem().getBlockManager().getDatanodeManager().getDatanode(dn.getDatanodeId()).getStorageInfos().length);
        }
        // Create a file which will end up on all 3 datanodes.
        final Path TEST_PATH = new Path("/foo1");
        DistributedFileSystem fs = cluster.getFileSystem();
        DFSTestUtil.createFile(fs, TEST_PATH, 1024, (short) 3, 0xcafecafe);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(dn);
        }
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, new Path("/foo1"));
        cluster.getNamesystem().writeLock();
        final String storageIdToRemove;
        String datanodeUuid;
        // Find the first storage which this block is in.
        try {
            BlockInfo storedBlock = cluster.getNamesystem().getBlockManager().getStoredBlock(block.getLocalBlock());
            Iterator<DatanodeStorageInfo> storageInfoIter = cluster.getNamesystem().getBlockManager().blocksMap.getStorages(storedBlock).iterator();
            assertTrue(storageInfoIter.hasNext());
            DatanodeStorageInfo info = storageInfoIter.next();
            storageIdToRemove = info.getStorageID();
            datanodeUuid = info.getDatanodeDescriptor().getDatanodeUuid();
        } finally {
            cluster.getNamesystem().writeUnlock();
        }
        // Find the DataNode which holds that first storage.
        final DataNode datanodeToRemoveStorageFrom;
        int datanodeToRemoveStorageFromIdx = 0;
        while (true) {
            if (datanodeToRemoveStorageFromIdx >= cluster.getDataNodes().size()) {
                Assert.fail("failed to find datanode with uuid " + datanodeUuid);
                datanodeToRemoveStorageFrom = null;
                break;
            }
            DataNode dn = cluster.getDataNodes().get(datanodeToRemoveStorageFromIdx);
            if (dn.getDatanodeUuid().equals(datanodeUuid)) {
                datanodeToRemoveStorageFrom = dn;
                break;
            }
            datanodeToRemoveStorageFromIdx++;
        }
        // Find the volume within the datanode which holds that first storage.
        StorageLocation volumeLocationToRemove = null;
        try (FsVolumeReferences volumes = datanodeToRemoveStorageFrom.getFSDataset().getFsVolumeReferences()) {
            assertEquals(NUM_STORAGES_PER_DN, volumes.size());
            for (FsVolumeSpi volume : volumes) {
                if (volume.getStorageID().equals(storageIdToRemove)) {
                    volumeLocationToRemove = volume.getStorageLocation();
                }
            }
        }
        ;
        // Shut down the datanode and remove the volume.
        // Replace the volume directory with a regular file, which will
        // cause a volume failure.  (If we merely removed the directory,
        // it would be re-initialized with a new storage ID.)
        assertNotNull(volumeLocationToRemove);
        datanodeToRemoveStorageFrom.shutdown();
        FileUtil.fullyDelete(new File(volumeLocationToRemove.getUri()));
        FileOutputStream fos = new FileOutputStream(new File(volumeLocationToRemove.getUri()));
        try {
            fos.write(1);
        } finally {
            fos.close();
        }
        cluster.restartDataNode(datanodeToRemoveStorageFromIdx);
        // Wait for the NameNode to remove the storage.
        LOG.info("waiting for the datanode to remove " + storageIdToRemove);
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                final DatanodeDescriptor dnDescriptor = cluster.getNamesystem().getBlockManager().getDatanodeManager().getDatanode(datanodeToRemoveStorageFrom.getDatanodeUuid());
                assertNotNull(dnDescriptor);
                DatanodeStorageInfo[] infos = dnDescriptor.getStorageInfos();
                for (DatanodeStorageInfo info : infos) {
                    if (info.getStorageID().equals(storageIdToRemove)) {
                        LOG.info("Still found storage " + storageIdToRemove + " on " + info + ".");
                        return false;
                    }
                }
                assertEquals(NUM_STORAGES_PER_DN - 1, infos.length);
                return true;
            }
        }, 1000, 30000);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) FsVolumeReferences(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi.FsVolumeReferences) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileOutputStream(java.io.FileOutputStream) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) File(java.io.File) Test(org.junit.Test)

Aggregations

FsVolumeSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi)33 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)15 Test (org.junit.Test)10 IOException (java.io.IOException)8 File (java.io.File)7 HashSet (java.util.HashSet)7 Path (org.apache.hadoop.fs.Path)6 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 FsVolumeReferences (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi.FsVolumeReferences)6 Configuration (org.apache.hadoop.conf.Configuration)5 AutoCloseableLock (org.apache.hadoop.util.AutoCloseableLock)5 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)4 ArrayList (java.util.ArrayList)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)3 StorageLocation (org.apache.hadoop.hdfs.server.datanode.StorageLocation)3 FsVolumeReference (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference)3 FsVolumeImpl (org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl)3 DatanodeStorage (org.apache.hadoop.hdfs.server.protocol.DatanodeStorage)3 HashMap (java.util.HashMap)2