use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.
the class TestFetcher method testReduceOutOfDiskSpace.
@Test
public void testReduceOutOfDiskSpace() throws Throwable {
LOG.info("testReduceOutOfDiskSpace");
Fetcher<Text, Text> underTest = new FakeFetcher<Text, Text>(job, id, ss, mm, r, metrics, except, key, connection);
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
header.write(new DataOutputStream(bout));
ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
when(connection.getResponseCode()).thenReturn(200);
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash);
when(connection.getInputStream()).thenReturn(in);
when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())).thenThrow(new DiskErrorException("No disk space available"));
underTest.copyFromHost(host);
verify(ss).reportLocalError(any(IOException.class));
}
use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.
the class TestBasicDiskValidator method checkDirs.
@Override
protected void checkDirs(boolean isDir, String perm, boolean success) throws Throwable {
File localDir = isDir ? createTempDir() : createTempFile();
try {
Shell.execCommand(Shell.getSetPermissionCommand(perm, false, localDir.getAbsolutePath()));
DiskValidatorFactory.getInstance(BasicDiskValidator.NAME).checkStatus(localDir);
assertTrue("call to checkDir() succeeded.", success);
} catch (DiskErrorException e) {
// if success is false, otherwise throw the exception
if (success) {
throw e;
}
} finally {
localDir.delete();
}
}
use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.
the class TestDiskChecker method _mkdirs.
private void _mkdirs(boolean exists, FsPermission before, FsPermission after) throws Throwable {
File localDir = make(stub(File.class).returning(exists).from.exists());
when(localDir.mkdir()).thenReturn(true);
// use default stubs
Path dir = mock(Path.class);
LocalFileSystem fs = make(stub(LocalFileSystem.class).returning(localDir).from.pathToFile(dir));
FileStatus stat = make(stub(FileStatus.class).returning(after).from.getPermission());
when(fs.getFileStatus(dir)).thenReturn(stat);
try {
DiskChecker.mkdirsWithExistsAndPermissionCheck(fs, dir, before);
if (!exists)
verify(fs).setPermission(dir, before);
else {
verify(fs).getFileStatus(dir);
verify(stat).getPermission();
}
} catch (DiskErrorException e) {
if (before != after)
assertTrue(e.getMessage().startsWith("Incorrect permission"));
}
}
use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.
the class TestReadWriteDiskValidator method testCheckFailures.
@Test
public void testCheckFailures() throws Throwable {
ReadWriteDiskValidator readWriteDiskValidator = (ReadWriteDiskValidator) DiskValidatorFactory.getInstance(ReadWriteDiskValidator.NAME);
// create a temporary test directory under the system test directory
File testDir = Files.createTempDirectory(Paths.get(System.getProperty("test.build.data")), "test").toFile();
try {
Shell.execCommand(Shell.getSetPermissionCommand("000", false, testDir.getAbsolutePath()));
} catch (Exception e) {
testDir.delete();
throw e;
}
try {
readWriteDiskValidator.checkStatus(testDir);
fail("Disk check should fail.");
} catch (DiskErrorException e) {
assertTrue(e.getMessage().equals("Disk Check failed!"));
}
MetricsSource source = ms.getSource(ReadWriteDiskValidatorMetrics.sourceName(testDir.toString()));
MetricsCollectorImpl collector = new MetricsCollectorImpl();
source.getMetrics(collector, true);
try {
readWriteDiskValidator.checkStatus(testDir);
fail("Disk check should fail.");
} catch (DiskErrorException e) {
assertTrue(e.getMessage().equals("Disk Check failed!"));
}
source.getMetrics(collector, true);
// verify the first metrics record
MetricsRecords.assertMetric(collector.getRecords().get(0), "FailureCount", 1);
Long lastFailureTime1 = (Long) MetricsRecords.getMetricValueByName(collector.getRecords().get(0), "LastFailureTime");
// verify the second metrics record
MetricsRecords.assertMetric(collector.getRecords().get(1), "FailureCount", 2);
Long lastFailureTime2 = (Long) MetricsRecords.getMetricValueByName(collector.getRecords().get(1), "LastFailureTime");
assertTrue("The first failure time should be less than the second one", lastFailureTime1 < lastFailureTime2);
testDir.delete();
}
use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.
the class StorageLocationChecker method check.
/**
* Initiate a check of the supplied storage volumes and return
* a list of failed volumes.
*
* StorageLocations are returned in the same order as the input
* for compatibility with existing unit tests.
*
* @param conf HDFS configuration.
* @param dataDirs list of volumes to check.
* @return returns a list of failed volumes. Returns the empty list if
* there are no failed volumes.
*
* @throws InterruptedException if the check was interrupted.
* @throws IOException if the number of failed volumes exceeds the
* maximum allowed or if there are no good
* volumes.
*/
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
final Set<StorageLocation> failedLocations = new HashSet<>();
final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
final LocalFileSystem localFS = FileSystem.getLocal(conf);
final CheckContext context = new CheckContext(localFS, expectedPermission);
// Start parallel disk check operations on all StorageLocations.
for (StorageLocation location : dataDirs) {
goodLocations.put(location, true);
Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
if (olf.isPresent()) {
futures.put(location, olf.get());
}
}
if (maxVolumeFailuresTolerated >= dataDirs.size()) {
throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
}
final long checkStartTimeMs = timer.monotonicNow();
// Retrieve the results of the disk checks.
for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
// Determine how much time we can allow for this check to complete.
// The cumulative wait time cannot exceed maxAllowedTimeForCheck.
final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
final StorageLocation location = entry.getKey();
try {
final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
switch(result) {
case HEALTHY:
break;
case DEGRADED:
LOG.warn("StorageLocation {} appears to be degraded.", location);
break;
case FAILED:
LOG.warn("StorageLocation {} detected as failed.", location);
failedLocations.add(location);
goodLocations.remove(location);
break;
default:
LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
}
} catch (ExecutionException | TimeoutException e) {
LOG.warn("Exception checking StorageLocation " + location, e.getCause());
failedLocations.add(location);
goodLocations.remove(location);
}
}
if (failedLocations.size() > maxVolumeFailuresTolerated) {
throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
}
if (goodLocations.size() == 0) {
throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
}
return new ArrayList<>(goodLocations.keySet());
}
Aggregations