Search in sources :

Example 1 with DiskErrorException

use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.

the class TestFetcher method testReduceOutOfDiskSpace.

@Test
public void testReduceOutOfDiskSpace() throws Throwable {
    LOG.info("testReduceOutOfDiskSpace");
    Fetcher<Text, Text> underTest = new FakeFetcher<Text, Text>(job, id, ss, mm, r, metrics, except, key, connection);
    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    header.write(new DataOutputStream(bout));
    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
    when(connection.getResponseCode()).thenReturn(200);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash);
    when(connection.getInputStream()).thenReturn(in);
    when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())).thenThrow(new DiskErrorException("No disk space available"));
    underTest.copyFromHost(host);
    verify(ss).reportLocalError(any(IOException.class));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) Test(org.junit.Test)

Example 2 with DiskErrorException

use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.

the class TestBasicDiskValidator method checkDirs.

@Override
protected void checkDirs(boolean isDir, String perm, boolean success) throws Throwable {
    File localDir = isDir ? createTempDir() : createTempFile();
    try {
        Shell.execCommand(Shell.getSetPermissionCommand(perm, false, localDir.getAbsolutePath()));
        DiskValidatorFactory.getInstance(BasicDiskValidator.NAME).checkStatus(localDir);
        assertTrue("call to checkDir() succeeded.", success);
    } catch (DiskErrorException e) {
        // if success is false, otherwise throw the exception
        if (success) {
            throw e;
        }
    } finally {
        localDir.delete();
    }
}
Also used : DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) File(java.io.File)

Example 3 with DiskErrorException

use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.

the class TestDiskChecker method _mkdirs.

private void _mkdirs(boolean exists, FsPermission before, FsPermission after) throws Throwable {
    File localDir = make(stub(File.class).returning(exists).from.exists());
    when(localDir.mkdir()).thenReturn(true);
    // use default stubs
    Path dir = mock(Path.class);
    LocalFileSystem fs = make(stub(LocalFileSystem.class).returning(localDir).from.pathToFile(dir));
    FileStatus stat = make(stub(FileStatus.class).returning(after).from.getPermission());
    when(fs.getFileStatus(dir)).thenReturn(stat);
    try {
        DiskChecker.mkdirsWithExistsAndPermissionCheck(fs, dir, before);
        if (!exists)
            verify(fs).setPermission(dir, before);
        else {
            verify(fs).getFileStatus(dir);
            verify(stat).getPermission();
        }
    } catch (DiskErrorException e) {
        if (before != after)
            assertTrue(e.getMessage().startsWith("Incorrect permission"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException)

Example 4 with DiskErrorException

use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.

the class TestReadWriteDiskValidator method testCheckFailures.

@Test
public void testCheckFailures() throws Throwable {
    ReadWriteDiskValidator readWriteDiskValidator = (ReadWriteDiskValidator) DiskValidatorFactory.getInstance(ReadWriteDiskValidator.NAME);
    // create a temporary test directory under the system test directory
    File testDir = Files.createTempDirectory(Paths.get(System.getProperty("test.build.data")), "test").toFile();
    try {
        Shell.execCommand(Shell.getSetPermissionCommand("000", false, testDir.getAbsolutePath()));
    } catch (Exception e) {
        testDir.delete();
        throw e;
    }
    try {
        readWriteDiskValidator.checkStatus(testDir);
        fail("Disk check should fail.");
    } catch (DiskErrorException e) {
        assertTrue(e.getMessage().equals("Disk Check failed!"));
    }
    MetricsSource source = ms.getSource(ReadWriteDiskValidatorMetrics.sourceName(testDir.toString()));
    MetricsCollectorImpl collector = new MetricsCollectorImpl();
    source.getMetrics(collector, true);
    try {
        readWriteDiskValidator.checkStatus(testDir);
        fail("Disk check should fail.");
    } catch (DiskErrorException e) {
        assertTrue(e.getMessage().equals("Disk Check failed!"));
    }
    source.getMetrics(collector, true);
    // verify the first metrics record
    MetricsRecords.assertMetric(collector.getRecords().get(0), "FailureCount", 1);
    Long lastFailureTime1 = (Long) MetricsRecords.getMetricValueByName(collector.getRecords().get(0), "LastFailureTime");
    // verify the second metrics record
    MetricsRecords.assertMetric(collector.getRecords().get(1), "FailureCount", 2);
    Long lastFailureTime2 = (Long) MetricsRecords.getMetricValueByName(collector.getRecords().get(1), "LastFailureTime");
    assertTrue("The first failure time should be less than the second one", lastFailureTime1 < lastFailureTime2);
    testDir.delete();
}
Also used : MetricsSource(org.apache.hadoop.metrics2.MetricsSource) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) File(java.io.File) MetricsCollectorImpl(org.apache.hadoop.metrics2.impl.MetricsCollectorImpl) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) Test(org.junit.Test)

Example 5 with DiskErrorException

use of org.apache.hadoop.util.DiskChecker.DiskErrorException in project hadoop by apache.

the class StorageLocationChecker method check.

/**
   * Initiate a check of the supplied storage volumes and return
   * a list of failed volumes.
   *
   * StorageLocations are returned in the same order as the input
   * for compatibility with existing unit tests.
   *
   * @param conf HDFS configuration.
   * @param dataDirs list of volumes to check.
   * @return returns a list of failed volumes. Returns the empty list if
   *         there are no failed volumes.
   *
   * @throws InterruptedException if the check was interrupted.
   * @throws IOException if the number of failed volumes exceeds the
   *                     maximum allowed or if there are no good
   *                     volumes.
   */
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
    final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
    final Set<StorageLocation> failedLocations = new HashSet<>();
    final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
    final LocalFileSystem localFS = FileSystem.getLocal(conf);
    final CheckContext context = new CheckContext(localFS, expectedPermission);
    // Start parallel disk check operations on all StorageLocations.
    for (StorageLocation location : dataDirs) {
        goodLocations.put(location, true);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
        if (olf.isPresent()) {
            futures.put(location, olf.get());
        }
    }
    if (maxVolumeFailuresTolerated >= dataDirs.size()) {
        throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
    }
    final long checkStartTimeMs = timer.monotonicNow();
    // Retrieve the results of the disk checks.
    for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
        // Determine how much time we can allow for this check to complete.
        // The cumulative wait time cannot exceed maxAllowedTimeForCheck.
        final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
        final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
        final StorageLocation location = entry.getKey();
        try {
            final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
            switch(result) {
                case HEALTHY:
                    break;
                case DEGRADED:
                    LOG.warn("StorageLocation {} appears to be degraded.", location);
                    break;
                case FAILED:
                    LOG.warn("StorageLocation {} detected as failed.", location);
                    failedLocations.add(location);
                    goodLocations.remove(location);
                    break;
                default:
                    LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
            }
        } catch (ExecutionException | TimeoutException e) {
            LOG.warn("Exception checking StorageLocation " + location, e.getCause());
            failedLocations.add(location);
            goodLocations.remove(location);
        }
    }
    if (failedLocations.size() > maxVolumeFailuresTolerated) {
        throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
    }
    if (goodLocations.size() == 0) {
        throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
    }
    return new ArrayList<>(goodLocations.keySet());
}
Also used : CheckContext(org.apache.hadoop.hdfs.server.datanode.StorageLocation.CheckContext) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) ExecutionException(java.util.concurrent.ExecutionException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

DiskErrorException (org.apache.hadoop.util.DiskChecker.DiskErrorException)14 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)5 File (java.io.File)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 StorageLocation (org.apache.hadoop.hdfs.server.datanode.StorageLocation)2 FileAlreadyExistsException (org.apache.hadoop.mapred.FileAlreadyExistsException)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutputStream (java.io.DataOutputStream)1 Path (java.nio.file.Path)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1