Search in sources :

Example 1 with StorageLocation

use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.

the class StorageLocationChecker method check.

/**
   * Initiate a check of the supplied storage volumes and return
   * a list of failed volumes.
   *
   * StorageLocations are returned in the same order as the input
   * for compatibility with existing unit tests.
   *
   * @param conf HDFS configuration.
   * @param dataDirs list of volumes to check.
   * @return returns a list of failed volumes. Returns the empty list if
   *         there are no failed volumes.
   *
   * @throws InterruptedException if the check was interrupted.
   * @throws IOException if the number of failed volumes exceeds the
   *                     maximum allowed or if there are no good
   *                     volumes.
   */
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
    final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
    final Set<StorageLocation> failedLocations = new HashSet<>();
    final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
    final LocalFileSystem localFS = FileSystem.getLocal(conf);
    final CheckContext context = new CheckContext(localFS, expectedPermission);
    // Start parallel disk check operations on all StorageLocations.
    for (StorageLocation location : dataDirs) {
        goodLocations.put(location, true);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
        if (olf.isPresent()) {
            futures.put(location, olf.get());
        }
    }
    if (maxVolumeFailuresTolerated >= dataDirs.size()) {
        throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
    }
    final long checkStartTimeMs = timer.monotonicNow();
    // Retrieve the results of the disk checks.
    for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
        // Determine how much time we can allow for this check to complete.
        // The cumulative wait time cannot exceed maxAllowedTimeForCheck.
        final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
        final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
        final StorageLocation location = entry.getKey();
        try {
            final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
            switch(result) {
                case HEALTHY:
                    break;
                case DEGRADED:
                    LOG.warn("StorageLocation {} appears to be degraded.", location);
                    break;
                case FAILED:
                    LOG.warn("StorageLocation {} detected as failed.", location);
                    failedLocations.add(location);
                    goodLocations.remove(location);
                    break;
                default:
                    LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
            }
        } catch (ExecutionException | TimeoutException e) {
            LOG.warn("Exception checking StorageLocation " + location, e.getCause());
            failedLocations.add(location);
            goodLocations.remove(location);
        }
    }
    if (failedLocations.size() > maxVolumeFailuresTolerated) {
        throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
    }
    if (goodLocations.size() == 0) {
        throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
    }
    return new ArrayList<>(goodLocations.keySet());
}
Also used : CheckContext(org.apache.hadoop.hdfs.server.datanode.StorageLocation.CheckContext) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) ExecutionException(java.util.concurrent.ExecutionException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with StorageLocation

use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.

the class FsDatasetTestUtil method assertFileLockReleased.

/**
   * Asserts that the storage lock file in the given directory has been
   * released.  This method works by trying to acquire the lock file itself.  If
   * locking fails here, then the main code must have failed to release it.
   *
   * @param dir the storage directory to check
   * @throws IOException if there is an unexpected I/O error
   */
public static void assertFileLockReleased(String dir) throws IOException {
    StorageLocation sl = StorageLocation.parse(dir);
    File lockFile = new File(new File(sl.getUri()), Storage.STORAGE_FILE_LOCK);
    try (RandomAccessFile raf = new RandomAccessFile(lockFile, "rws");
        FileChannel channel = raf.getChannel()) {
        FileLock lock = channel.tryLock();
        assertNotNull(String.format("Lock file at %s appears to be held by a different process.", lockFile.getAbsolutePath()), lock);
        if (lock != null) {
            try {
                lock.release();
            } catch (IOException e) {
                FsDatasetImpl.LOG.warn(String.format("I/O error releasing file lock %s.", lockFile.getAbsolutePath()), e);
                throw e;
            }
        }
    } catch (OverlappingFileLockException e) {
        fail(String.format("Must release lock file at %s.", lockFile.getAbsolutePath()));
    }
}
Also used : RandomAccessFile(java.io.RandomAccessFile) FileChannel(java.nio.channels.FileChannel) FileLock(java.nio.channels.FileLock) IOException(java.io.IOException) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) OverlappingFileLockException(java.nio.channels.OverlappingFileLockException)

Example 3 with StorageLocation

use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.

the class TestFsDatasetImpl method testRemoveVolumes.

@Test(timeout = 30000)
public void testRemoveVolumes() throws IOException {
    // Feed FsDataset with block metadata.
    final int NUM_BLOCKS = 100;
    for (int i = 0; i < NUM_BLOCKS; i++) {
        String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
        ExtendedBlock eb = new ExtendedBlock(bpid, i);
        try (ReplicaHandler replica = dataset.createRbw(StorageType.DEFAULT, eb, false)) {
        }
    }
    final String[] dataDirs = conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
    final String volumePathToRemove = dataDirs[0];
    Set<StorageLocation> volumesToRemove = new HashSet<>();
    volumesToRemove.add(StorageLocation.parse(volumePathToRemove));
    FsVolumeReferences volReferences = dataset.getFsVolumeReferences();
    FsVolumeImpl volumeToRemove = null;
    for (FsVolumeSpi vol : volReferences) {
        if (vol.getStorageLocation().equals(volumesToRemove.iterator().next())) {
            volumeToRemove = (FsVolumeImpl) vol;
        }
    }
    assertTrue(volumeToRemove != null);
    volReferences.close();
    dataset.removeVolumes(volumesToRemove, true);
    int expectedNumVolumes = dataDirs.length - 1;
    assertEquals("The volume has been removed from the volumeList.", expectedNumVolumes, getNumVolumes());
    assertEquals("The volume has been removed from the storageMap.", expectedNumVolumes, dataset.storageMap.size());
    try {
        dataset.asyncDiskService.execute(volumeToRemove, new Runnable() {

            @Override
            public void run() {
            }
        });
        fail("Expect RuntimeException: the volume has been removed from the " + "AsyncDiskService.");
    } catch (RuntimeException e) {
        GenericTestUtils.assertExceptionContains("Cannot find volume", e);
    }
    int totalNumReplicas = 0;
    for (String bpid : dataset.volumeMap.getBlockPoolList()) {
        totalNumReplicas += dataset.volumeMap.size(bpid);
    }
    assertEquals("The replica infos on this volume has been removed from the " + "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES, totalNumReplicas);
}
Also used : ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) FsVolumeReferences(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi.FsVolumeReferences) Matchers.anyString(org.mockito.Matchers.anyString) ReplicaHandler(org.apache.hadoop.hdfs.server.datanode.ReplicaHandler) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with StorageLocation

use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.

the class TestDatasetVolumeCheckerFailures method makeHungVolume.

/**
   * Create a mock FsVolumeSpi whose {@link FsVolumeSpi#check} routine
   * hangs forever.
   *
   * @return volume
   * @throws Exception
   */
private static FsVolumeSpi makeHungVolume() throws Exception {
    final FsVolumeSpi volume = mock(FsVolumeSpi.class);
    final FsVolumeReference reference = mock(FsVolumeReference.class);
    final StorageLocation location = mock(StorageLocation.class);
    when(reference.getVolume()).thenReturn(volume);
    when(volume.obtainReference()).thenReturn(reference);
    when(volume.getStorageLocation()).thenReturn(location);
    when(volume.check(anyObject())).thenAnswer(new Answer<VolumeCheckResult>() {

        @Override
        public VolumeCheckResult answer(InvocationOnMock invocation) throws Throwable {
            // Sleep forever.
            Thread.sleep(Long.MAX_VALUE);
            // unreachable.
            return VolumeCheckResult.HEALTHY;
        }
    });
    return volume;
}
Also used : InvocationOnMock(org.mockito.invocation.InvocationOnMock) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation)

Example 5 with StorageLocation

use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.

the class TestFsDatasetImpl method getDfsUsedValueOfNewVolume.

private long getDfsUsedValueOfNewVolume(long cacheDfsUsed, long waitIntervalTime) throws IOException, InterruptedException {
    List<NamespaceInfo> nsInfos = Lists.newArrayList();
    nsInfos.add(new NamespaceInfo(0, CLUSTER_ID, BLOCK_POOL_IDS[0], 1));
    String CURRENT_DIR = "current";
    String DU_CACHE_FILE = BlockPoolSlice.DU_CACHE_FILE;
    String path = BASE_DIR + "/newData0";
    String pathUri = new Path(path).toUri().toString();
    StorageLocation loc = StorageLocation.parse(pathUri);
    Storage.StorageDirectory sd = createStorageDirectory(new File(path));
    DataStorage.VolumeBuilder builder = new DataStorage.VolumeBuilder(storage, sd);
    when(storage.prepareVolume(eq(datanode), eq(loc), anyListOf(NamespaceInfo.class))).thenReturn(builder);
    String cacheFilePath = String.format("%s/%s/%s/%s/%s", path, CURRENT_DIR, BLOCK_POOL_IDS[0], CURRENT_DIR, DU_CACHE_FILE);
    File outFile = new File(cacheFilePath);
    if (!outFile.getParentFile().exists()) {
        outFile.getParentFile().mkdirs();
    }
    if (outFile.exists()) {
        outFile.delete();
    }
    FakeTimer timer = new FakeTimer();
    try {
        try (Writer out = new OutputStreamWriter(new FileOutputStream(outFile), StandardCharsets.UTF_8)) {
            // Write the dfsUsed value and the time to cache file
            out.write(Long.toString(cacheDfsUsed) + " " + Long.toString(timer.now()));
            out.flush();
        }
    } catch (IOException ioe) {
    }
    dataset.setTimer(timer);
    timer.advance(waitIntervalTime);
    dataset.addVolume(loc, nsInfos);
    // Get the last volume which was just added before
    FsVolumeImpl newVolume;
    try (FsDatasetSpi.FsVolumeReferences volumes = dataset.getFsVolumeReferences()) {
        newVolume = (FsVolumeImpl) volumes.get(volumes.size() - 1);
    }
    long dfsUsed = newVolume.getDfsUsed();
    return dfsUsed;
}
Also used : Path(org.apache.hadoop.fs.Path) DataStorage(org.apache.hadoop.hdfs.server.datanode.DataStorage) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) Matchers.anyString(org.mockito.Matchers.anyString) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) FsVolumeReferences(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi.FsVolumeReferences) DataStorage(org.apache.hadoop.hdfs.server.datanode.DataStorage) Storage(org.apache.hadoop.hdfs.server.common.Storage) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) NamespaceInfo(org.apache.hadoop.hdfs.server.protocol.NamespaceInfo) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) File(java.io.File) FakeTimer(org.apache.hadoop.util.FakeTimer) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Aggregations

StorageLocation (org.apache.hadoop.hdfs.server.datanode.StorageLocation)24 Test (org.junit.Test)11 File (java.io.File)7 ArrayList (java.util.ArrayList)7 Configuration (org.apache.hadoop.conf.Configuration)7 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)7 Storage (org.apache.hadoop.hdfs.server.common.Storage)6 DataStorage (org.apache.hadoop.hdfs.server.datanode.DataStorage)6 FakeTimer (org.apache.hadoop.util.FakeTimer)6 HashSet (java.util.HashSet)5 Matchers.anyString (org.mockito.Matchers.anyString)5 IOException (java.io.IOException)4 StorageDirectory (org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory)4 FsVolumeReferences (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi.FsVolumeReferences)4 NamespaceInfo (org.apache.hadoop.hdfs.server.protocol.NamespaceInfo)4 Path (org.apache.hadoop.fs.Path)3 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)3 FsVolumeSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi)3 MultipleIOException (org.apache.hadoop.io.MultipleIOException)3 FileOutputStream (java.io.FileOutputStream)2