use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.
the class StorageLocationChecker method check.
/**
* Initiate a check of the supplied storage volumes and return
* a list of failed volumes.
*
* StorageLocations are returned in the same order as the input
* for compatibility with existing unit tests.
*
* @param conf HDFS configuration.
* @param dataDirs list of volumes to check.
* @return returns a list of failed volumes. Returns the empty list if
* there are no failed volumes.
*
* @throws InterruptedException if the check was interrupted.
* @throws IOException if the number of failed volumes exceeds the
* maximum allowed or if there are no good
* volumes.
*/
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
final Set<StorageLocation> failedLocations = new HashSet<>();
final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
final LocalFileSystem localFS = FileSystem.getLocal(conf);
final CheckContext context = new CheckContext(localFS, expectedPermission);
// Start parallel disk check operations on all StorageLocations.
for (StorageLocation location : dataDirs) {
goodLocations.put(location, true);
Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
if (olf.isPresent()) {
futures.put(location, olf.get());
}
}
if (maxVolumeFailuresTolerated >= dataDirs.size()) {
throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
}
final long checkStartTimeMs = timer.monotonicNow();
// Retrieve the results of the disk checks.
for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
// Determine how much time we can allow for this check to complete.
// The cumulative wait time cannot exceed maxAllowedTimeForCheck.
final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
final StorageLocation location = entry.getKey();
try {
final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
switch(result) {
case HEALTHY:
break;
case DEGRADED:
LOG.warn("StorageLocation {} appears to be degraded.", location);
break;
case FAILED:
LOG.warn("StorageLocation {} detected as failed.", location);
failedLocations.add(location);
goodLocations.remove(location);
break;
default:
LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
}
} catch (ExecutionException | TimeoutException e) {
LOG.warn("Exception checking StorageLocation " + location, e.getCause());
failedLocations.add(location);
goodLocations.remove(location);
}
}
if (failedLocations.size() > maxVolumeFailuresTolerated) {
throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
}
if (goodLocations.size() == 0) {
throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
}
return new ArrayList<>(goodLocations.keySet());
}
use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.
the class FsDatasetTestUtil method assertFileLockReleased.
/**
* Asserts that the storage lock file in the given directory has been
* released. This method works by trying to acquire the lock file itself. If
* locking fails here, then the main code must have failed to release it.
*
* @param dir the storage directory to check
* @throws IOException if there is an unexpected I/O error
*/
public static void assertFileLockReleased(String dir) throws IOException {
StorageLocation sl = StorageLocation.parse(dir);
File lockFile = new File(new File(sl.getUri()), Storage.STORAGE_FILE_LOCK);
try (RandomAccessFile raf = new RandomAccessFile(lockFile, "rws");
FileChannel channel = raf.getChannel()) {
FileLock lock = channel.tryLock();
assertNotNull(String.format("Lock file at %s appears to be held by a different process.", lockFile.getAbsolutePath()), lock);
if (lock != null) {
try {
lock.release();
} catch (IOException e) {
FsDatasetImpl.LOG.warn(String.format("I/O error releasing file lock %s.", lockFile.getAbsolutePath()), e);
throw e;
}
}
} catch (OverlappingFileLockException e) {
fail(String.format("Must release lock file at %s.", lockFile.getAbsolutePath()));
}
}
use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.
the class TestFsDatasetImpl method testRemoveVolumes.
@Test(timeout = 30000)
public void testRemoveVolumes() throws IOException {
// Feed FsDataset with block metadata.
final int NUM_BLOCKS = 100;
for (int i = 0; i < NUM_BLOCKS; i++) {
String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
ExtendedBlock eb = new ExtendedBlock(bpid, i);
try (ReplicaHandler replica = dataset.createRbw(StorageType.DEFAULT, eb, false)) {
}
}
final String[] dataDirs = conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
final String volumePathToRemove = dataDirs[0];
Set<StorageLocation> volumesToRemove = new HashSet<>();
volumesToRemove.add(StorageLocation.parse(volumePathToRemove));
FsVolumeReferences volReferences = dataset.getFsVolumeReferences();
FsVolumeImpl volumeToRemove = null;
for (FsVolumeSpi vol : volReferences) {
if (vol.getStorageLocation().equals(volumesToRemove.iterator().next())) {
volumeToRemove = (FsVolumeImpl) vol;
}
}
assertTrue(volumeToRemove != null);
volReferences.close();
dataset.removeVolumes(volumesToRemove, true);
int expectedNumVolumes = dataDirs.length - 1;
assertEquals("The volume has been removed from the volumeList.", expectedNumVolumes, getNumVolumes());
assertEquals("The volume has been removed from the storageMap.", expectedNumVolumes, dataset.storageMap.size());
try {
dataset.asyncDiskService.execute(volumeToRemove, new Runnable() {
@Override
public void run() {
}
});
fail("Expect RuntimeException: the volume has been removed from the " + "AsyncDiskService.");
} catch (RuntimeException e) {
GenericTestUtils.assertExceptionContains("Cannot find volume", e);
}
int totalNumReplicas = 0;
for (String bpid : dataset.volumeMap.getBlockPoolList()) {
totalNumReplicas += dataset.volumeMap.size(bpid);
}
assertEquals("The replica infos on this volume has been removed from the " + "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES, totalNumReplicas);
}
use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.
the class TestDatasetVolumeCheckerFailures method makeHungVolume.
/**
* Create a mock FsVolumeSpi whose {@link FsVolumeSpi#check} routine
* hangs forever.
*
* @return volume
* @throws Exception
*/
private static FsVolumeSpi makeHungVolume() throws Exception {
final FsVolumeSpi volume = mock(FsVolumeSpi.class);
final FsVolumeReference reference = mock(FsVolumeReference.class);
final StorageLocation location = mock(StorageLocation.class);
when(reference.getVolume()).thenReturn(volume);
when(volume.obtainReference()).thenReturn(reference);
when(volume.getStorageLocation()).thenReturn(location);
when(volume.check(anyObject())).thenAnswer(new Answer<VolumeCheckResult>() {
@Override
public VolumeCheckResult answer(InvocationOnMock invocation) throws Throwable {
// Sleep forever.
Thread.sleep(Long.MAX_VALUE);
// unreachable.
return VolumeCheckResult.HEALTHY;
}
});
return volume;
}
use of org.apache.hadoop.hdfs.server.datanode.StorageLocation in project hadoop by apache.
the class TestFsDatasetImpl method getDfsUsedValueOfNewVolume.
private long getDfsUsedValueOfNewVolume(long cacheDfsUsed, long waitIntervalTime) throws IOException, InterruptedException {
List<NamespaceInfo> nsInfos = Lists.newArrayList();
nsInfos.add(new NamespaceInfo(0, CLUSTER_ID, BLOCK_POOL_IDS[0], 1));
String CURRENT_DIR = "current";
String DU_CACHE_FILE = BlockPoolSlice.DU_CACHE_FILE;
String path = BASE_DIR + "/newData0";
String pathUri = new Path(path).toUri().toString();
StorageLocation loc = StorageLocation.parse(pathUri);
Storage.StorageDirectory sd = createStorageDirectory(new File(path));
DataStorage.VolumeBuilder builder = new DataStorage.VolumeBuilder(storage, sd);
when(storage.prepareVolume(eq(datanode), eq(loc), anyListOf(NamespaceInfo.class))).thenReturn(builder);
String cacheFilePath = String.format("%s/%s/%s/%s/%s", path, CURRENT_DIR, BLOCK_POOL_IDS[0], CURRENT_DIR, DU_CACHE_FILE);
File outFile = new File(cacheFilePath);
if (!outFile.getParentFile().exists()) {
outFile.getParentFile().mkdirs();
}
if (outFile.exists()) {
outFile.delete();
}
FakeTimer timer = new FakeTimer();
try {
try (Writer out = new OutputStreamWriter(new FileOutputStream(outFile), StandardCharsets.UTF_8)) {
// Write the dfsUsed value and the time to cache file
out.write(Long.toString(cacheDfsUsed) + " " + Long.toString(timer.now()));
out.flush();
}
} catch (IOException ioe) {
}
dataset.setTimer(timer);
timer.advance(waitIntervalTime);
dataset.addVolume(loc, nsInfos);
// Get the last volume which was just added before
FsVolumeImpl newVolume;
try (FsDatasetSpi.FsVolumeReferences volumes = dataset.getFsVolumeReferences()) {
newVolume = (FsVolumeImpl) volumes.get(volumes.size() - 1);
}
long dfsUsed = newVolume.getDfsUsed();
return dfsUsed;
}
Aggregations