Search in sources :

Example 81 with HashSet

use of java.util.HashSet in project hadoop by apache.

the class StorageLocationChecker method check.

/**
   * Initiate a check of the supplied storage volumes and return
   * a list of failed volumes.
   *
   * StorageLocations are returned in the same order as the input
   * for compatibility with existing unit tests.
   *
   * @param conf HDFS configuration.
   * @param dataDirs list of volumes to check.
   * @return returns a list of failed volumes. Returns the empty list if
   *         there are no failed volumes.
   *
   * @throws InterruptedException if the check was interrupted.
   * @throws IOException if the number of failed volumes exceeds the
   *                     maximum allowed or if there are no good
   *                     volumes.
   */
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
    final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
    final Set<StorageLocation> failedLocations = new HashSet<>();
    final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
    final LocalFileSystem localFS = FileSystem.getLocal(conf);
    final CheckContext context = new CheckContext(localFS, expectedPermission);
    // Start parallel disk check operations on all StorageLocations.
    for (StorageLocation location : dataDirs) {
        goodLocations.put(location, true);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
        if (olf.isPresent()) {
            futures.put(location, olf.get());
        }
    }
    if (maxVolumeFailuresTolerated >= dataDirs.size()) {
        throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
    }
    final long checkStartTimeMs = timer.monotonicNow();
    // Retrieve the results of the disk checks.
    for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
        // Determine how much time we can allow for this check to complete.
        // The cumulative wait time cannot exceed maxAllowedTimeForCheck.
        final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
        final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
        final StorageLocation location = entry.getKey();
        try {
            final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
            switch(result) {
                case HEALTHY:
                    break;
                case DEGRADED:
                    LOG.warn("StorageLocation {} appears to be degraded.", location);
                    break;
                case FAILED:
                    LOG.warn("StorageLocation {} detected as failed.", location);
                    failedLocations.add(location);
                    goodLocations.remove(location);
                    break;
                default:
                    LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
            }
        } catch (ExecutionException | TimeoutException e) {
            LOG.warn("Exception checking StorageLocation " + location, e.getCause());
            failedLocations.add(location);
            goodLocations.remove(location);
        }
    }
    if (failedLocations.size() > maxVolumeFailuresTolerated) {
        throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
    }
    if (goodLocations.size() == 0) {
        throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
    }
    return new ArrayList<>(goodLocations.keySet());
}
Also used : CheckContext(org.apache.hadoop.hdfs.server.datanode.StorageLocation.CheckContext) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) ExecutionException(java.util.concurrent.ExecutionException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 82 with HashSet

use of java.util.HashSet in project hadoop by apache.

the class DataNode method handleVolumeFailures.

private void handleVolumeFailures(Set<FsVolumeSpi> unhealthyVolumes) {
    if (unhealthyVolumes.isEmpty()) {
        LOG.debug("handleVolumeFailures done with empty " + "unhealthyVolumes");
        return;
    }
    data.handleVolumeFailures(unhealthyVolumes);
    Set<StorageLocation> unhealthyLocations = new HashSet<>(unhealthyVolumes.size());
    StringBuilder sb = new StringBuilder("DataNode failed volumes:");
    for (FsVolumeSpi vol : unhealthyVolumes) {
        unhealthyLocations.add(vol.getStorageLocation());
        sb.append(vol.getStorageLocation()).append(";");
    }
    try {
        // Remove all unhealthy volumes from DataNode.
        removeVolumes(unhealthyLocations, false);
    } catch (IOException e) {
        LOG.warn("Error occurred when removing unhealthy storage dirs: " + e.getMessage(), e);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(sb.toString());
    }
    // send blockreport regarding volume failure
    handleDiskError(sb.toString());
}
Also used : FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 83 with HashSet

use of java.util.HashSet in project hadoop by apache.

the class StripedFileTestUtil method verifyLocatedStripedBlocks.

/**
   * Verify that blocks in striped block group are on different nodes, and every
   * internal blocks exists.
   */
public static void verifyLocatedStripedBlocks(LocatedBlocks lbs, int groupSize) {
    for (LocatedBlock lb : lbs.getLocatedBlocks()) {
        assert lb instanceof LocatedStripedBlock;
        HashSet<DatanodeInfo> locs = new HashSet<>();
        Collections.addAll(locs, lb.getLocations());
        assertEquals(groupSize, lb.getLocations().length);
        assertEquals(groupSize, locs.size());
        // verify that every internal blocks exists
        byte[] blockIndices = ((LocatedStripedBlock) lb).getBlockIndices();
        assertEquals(groupSize, blockIndices.length);
        HashSet<Integer> found = new HashSet<>();
        for (int index : blockIndices) {
            assert index >= 0;
            found.add(index);
        }
        assertEquals(groupSize, found.size());
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) HashSet(java.util.HashSet)

Example 84 with HashSet

use of java.util.HashSet in project hadoop by apache.

the class StripedFileTestUtil method checkData.

static void checkData(DistributedFileSystem dfs, Path srcPath, int length, List<DatanodeInfo> killedList, List<Long> oldGSList, int blkGroupSize) throws IOException {
    StripedFileTestUtil.verifyLength(dfs, srcPath, length);
    List<List<LocatedBlock>> blockGroupList = new ArrayList<>();
    LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(srcPath.toString(), 0L, Long.MAX_VALUE);
    int expectedNumGroup = 0;
    if (length > 0) {
        expectedNumGroup = (length - 1) / blkGroupSize + 1;
    }
    assertEquals(expectedNumGroup, lbs.getLocatedBlocks().size());
    final ErasureCodingPolicy ecPolicy = dfs.getErasureCodingPolicy(srcPath);
    final int cellSize = ecPolicy.getCellSize();
    final int dataBlkNum = ecPolicy.getNumDataUnits();
    final int parityBlkNum = ecPolicy.getNumParityUnits();
    int index = 0;
    for (LocatedBlock firstBlock : lbs.getLocatedBlocks()) {
        Assert.assertTrue(firstBlock instanceof LocatedStripedBlock);
        final long gs = firstBlock.getBlock().getGenerationStamp();
        final long oldGS = oldGSList != null ? oldGSList.get(index++) : -1L;
        final String s = "gs=" + gs + ", oldGS=" + oldGS;
        LOG.info(s);
        Assert.assertTrue(s, gs >= oldGS);
        LocatedBlock[] blocks = StripedBlockUtil.parseStripedBlockGroup((LocatedStripedBlock) firstBlock, cellSize, dataBlkNum, parityBlkNum);
        blockGroupList.add(Arrays.asList(blocks));
    }
    // test each block group
    for (int group = 0; group < blockGroupList.size(); group++) {
        final boolean isLastGroup = group == blockGroupList.size() - 1;
        final int groupSize = !isLastGroup ? blkGroupSize : length - (blockGroupList.size() - 1) * blkGroupSize;
        final int numCellInGroup = (groupSize - 1) / cellSize + 1;
        final int lastCellIndex = (numCellInGroup - 1) % dataBlkNum;
        final int lastCellSize = groupSize - (numCellInGroup - 1) * cellSize;
        //get the data of this block
        List<LocatedBlock> blockList = blockGroupList.get(group);
        byte[][] dataBlockBytes = new byte[dataBlkNum][];
        byte[][] parityBlockBytes = new byte[parityBlkNum][];
        Set<Integer> checkSet = new HashSet<>();
        // for each block, use BlockReader to read data
        for (int i = 0; i < blockList.size(); i++) {
            final int j = i >= dataBlkNum ? 0 : i;
            final int numCellInBlock = (numCellInGroup - 1) / dataBlkNum + (j <= lastCellIndex ? 1 : 0);
            final int blockSize = numCellInBlock * cellSize + (isLastGroup && j == lastCellIndex ? lastCellSize - cellSize : 0);
            final byte[] blockBytes = new byte[blockSize];
            if (i < dataBlkNum) {
                dataBlockBytes[i] = blockBytes;
            } else {
                parityBlockBytes[i - dataBlkNum] = blockBytes;
            }
            final LocatedBlock lb = blockList.get(i);
            LOG.info("i,j=" + i + ", " + j + ", numCellInBlock=" + numCellInBlock + ", blockSize=" + blockSize + ", lb=" + lb);
            if (lb == null) {
                continue;
            }
            final ExtendedBlock block = lb.getBlock();
            assertEquals(blockSize, block.getNumBytes());
            if (block.getNumBytes() == 0) {
                continue;
            }
            DatanodeInfo dn = blockList.get(i).getLocations()[0];
            if (!killedList.contains(dn)) {
                final BlockReader blockReader = BlockReaderTestUtil.getBlockReader(dfs, lb, 0, block.getNumBytes());
                blockReader.readAll(blockBytes, 0, (int) block.getNumBytes());
                blockReader.close();
                checkSet.add(i);
            }
        }
        LOG.info("Internal blocks to check: " + checkSet);
        // check data
        final int groupPosInFile = group * blkGroupSize;
        for (int i = 0; i < dataBlockBytes.length; i++) {
            boolean killed = false;
            if (!checkSet.contains(i)) {
                killed = true;
            }
            final byte[] actual = dataBlockBytes[i];
            for (int posInBlk = 0; posInBlk < actual.length; posInBlk++) {
                final long posInFile = StripedBlockUtil.offsetInBlkToOffsetInBG(cellSize, dataBlkNum, posInBlk, i) + groupPosInFile;
                Assert.assertTrue(posInFile < length);
                final byte expected = getByte(posInFile);
                if (killed) {
                    actual[posInBlk] = expected;
                } else {
                    if (expected != actual[posInBlk]) {
                        String s = "expected=" + expected + " but actual=" + actual[posInBlk] + ", posInFile=" + posInFile + ", posInBlk=" + posInBlk + ". group=" + group + ", i=" + i;
                        Assert.fail(s);
                    }
                }
            }
        }
        // check parity
        verifyParityBlocks(dfs.getConf(), lbs.getLocatedBlocks().get(group).getBlockSize(), cellSize, dataBlockBytes, parityBlockBytes, checkSet, ecPolicy.getCodecName());
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ErasureCodingPolicy(org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy) ArrayList(java.util.ArrayList) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet)

Example 85 with HashSet

use of java.util.HashSet in project hadoop by apache.

the class LazyPersistTestCase method ensureLazyPersistBlocksAreSaved.

/**
   * Make sure at least one non-transient volume has a saved copy of the replica.
   * An infinite loop is used to ensure the async lazy persist tasks are completely
   * done before verification. Caller of ensureLazyPersistBlocksAreSaved expects
   * either a successful pass or timeout failure.
   */
protected final void ensureLazyPersistBlocksAreSaved(LocatedBlocks locatedBlocks) throws IOException, InterruptedException {
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    final Set<Long> persistedBlockIds = new HashSet<Long>();
    try (FsDatasetSpi.FsVolumeReferences volumes = cluster.getDataNodes().get(0).getFSDataset().getFsVolumeReferences()) {
        while (persistedBlockIds.size() < locatedBlocks.getLocatedBlocks().size()) {
            // Take 1 second sleep before each verification iteration
            Thread.sleep(1000);
            for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) {
                for (FsVolumeSpi v : volumes) {
                    if (v.isTransientStorage()) {
                        continue;
                    }
                    FsVolumeImpl volume = (FsVolumeImpl) v;
                    File lazyPersistDir = volume.getBlockPoolSlice(bpid).getLazypersistDir();
                    long blockId = lb.getBlock().getBlockId();
                    File targetDir = DatanodeUtil.idToBlockDir(lazyPersistDir, blockId);
                    File blockFile = new File(targetDir, lb.getBlock().getBlockName());
                    if (blockFile.exists()) {
                        // Found a persisted copy for this block and added to the Set
                        persistedBlockIds.add(blockId);
                    }
                }
            }
        }
    }
    // We should have found a persisted copy for each located block.
    assertThat(persistedBlockIds.size(), is(locatedBlocks.getLocatedBlocks().size()));
}
Also used : FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

HashSet (java.util.HashSet)12137 Set (java.util.Set)2609 ArrayList (java.util.ArrayList)2318 HashMap (java.util.HashMap)2096 Test (org.junit.Test)2060 Map (java.util.Map)1198 Iterator (java.util.Iterator)979 IOException (java.io.IOException)934 List (java.util.List)911 File (java.io.File)607 LinkedHashSet (java.util.LinkedHashSet)460 Test (org.testng.annotations.Test)460 TreeSet (java.util.TreeSet)271 Collection (java.util.Collection)233 LinkedList (java.util.LinkedList)224 Region (org.apache.geode.cache.Region)202 SSOException (com.iplanet.sso.SSOException)188 Date (java.util.Date)180 LinkedHashMap (java.util.LinkedHashMap)169 PartitionedRegion (org.apache.geode.internal.cache.PartitionedRegion)166