Examples with HDFSBlocksDistribution - org.apache.hadoop.hbase.HDFSBlocksDistribution

Example 6 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class DateTieredCompactionPolicy method shouldPerformMajorCompaction.

public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
    long mcTime = getNextMajorCompactTime(filesToCompact);
    if (filesToCompact == null || mcTime == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("filesToCompact: " + filesToCompact + " mcTime: " + mcTime);
        }
        return false;
    }
    // TODO: Use better method for determining stamp of last major (HBASE-2990)
    long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
    long now = EnvironmentEdgeManager.currentTime();
    if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("lowTimestamp: " + lowTimestamp + " lowTimestamp: " + lowTimestamp + " now: " + now + " mcTime: " + mcTime);
        }
        return false;
    }
    long cfTTL = this.storeConfigInfo.getStoreFileTtl();
    HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
    List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
    boolean[] filesInWindow = new boolean[boundaries.size()];
    for (StoreFile file : filesToCompact) {
        Long minTimestamp = file.getMinimumTimestamp();
        long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
        if (cfTTL != Long.MAX_VALUE && oldest >= cfTTL) {
            LOG.debug("Major compaction triggered on store " + this + "; for TTL maintenance");
            return true;
        }
        if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
            LOG.debug("Major compaction triggered on store " + this + ", because there are new files and time since last major compaction " + (now - lowTimestamp) + "ms");
            return true;
        }
        int lowerWindowIndex = Collections.binarySearch(boundaries, minTimestamp == null ? (Long) Long.MAX_VALUE : minTimestamp);
        int upperWindowIndex = Collections.binarySearch(boundaries, file.getMaximumTimestamp() == null ? (Long) Long.MAX_VALUE : file.getMaximumTimestamp());
        // Handle boundary conditions and negative values of binarySearch
        lowerWindowIndex = (lowerWindowIndex < 0) ? Math.abs(lowerWindowIndex + 2) : lowerWindowIndex;
        upperWindowIndex = (upperWindowIndex < 0) ? Math.abs(upperWindowIndex + 2) : upperWindowIndex;
        if (lowerWindowIndex != upperWindowIndex) {
            LOG.debug("Major compaction triggered on store " + this + "; because file " + file.getPath() + " has data with timestamps cross window boundaries");
            return true;
        } else if (filesInWindow[upperWindowIndex]) {
            LOG.debug("Major compaction triggered on store " + this + "; because there are more than one file in some windows");
            return true;
        } else {
            filesInWindow[upperWindowIndex] = true;
        }
        hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
    }
    float blockLocalityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
    if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
        LOG.debug("Major compaction triggered on store " + this + "; to make hdfs blocks local, current blockLocalityIndex is " + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
        return true;
    }
    LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
    return false;
}

Also used : StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 7 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class FSUtils method computeHDFSBlocksDistribution.

/**
   * Compute HDFS blocks distribution of a given file, or a portion of the file
   * @param fs file system
   * @param status file status of the file
   * @param start start position of the portion
   * @param length length of the portion
   * @return The HDFS blocks distribution
   */
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs, FileStatus status, long start, long length) throws IOException {
    HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
    BlockLocation[] blockLocations = fs.getFileBlockLocations(status, start, length);
    for (BlockLocation bl : blockLocations) {
        String[] hosts = bl.getHosts();
        long len = bl.getLength();
        blocksDistribution.addHostsAndBlockWeight(hosts, len);
    }
    return blocksDistribution;
}

Also used : BlockLocation(org.apache.hadoop.fs.BlockLocation) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 8 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method refreshAndWait.

public void refreshAndWait(Collection<HRegionInfo> hris) {
    ArrayList<ListenableFuture<HDFSBlocksDistribution>> regionLocationFutures = new ArrayList<>(hris.size());
    for (HRegionInfo hregionInfo : hris) {
        regionLocationFutures.add(asyncGetBlockDistribution(hregionInfo));
    }
    int index = 0;
    for (HRegionInfo hregionInfo : hris) {
        ListenableFuture<HDFSBlocksDistribution> future = regionLocationFutures.get(index);
        try {
            cache.put(hregionInfo, future.get());
        } catch (InterruptedException ite) {
            Thread.currentThread().interrupt();
        } catch (ExecutionException ee) {
            LOG.debug("ExecutionException during HDFSBlocksDistribution computation. for region = " + hregionInfo.getEncodedName(), ee);
        }
        index++;
    }
}

Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ExecutionException(java.util.concurrent.ExecutionException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 9 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestFSUtils method testcomputeHDFSBlocksDistribution.

@Test
public void testcomputeHDFSBlocksDistribution() throws Exception {
    HBaseTestingUtility htu = new HBaseTestingUtility();
    final int DEFAULT_BLOCK_SIZE = 1024;
    htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
    MiniDFSCluster cluster = null;
    Path testFile = null;
    try {
        // set up a cluster with 3 nodes
        String[] hosts = new String[] { "host1", "host2", "host3" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with two blocks
        testFile = new Path("/test1.txt");
        WriteDataToHDFS(fs, testFile, 2 * DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, the same as the number of
        // datanodes; the locality index for each host should be 100%,
        // or getWeight for each host should be the same as getUniqueBlocksWeights
        final long maxTime = System.currentTimeMillis() + 2000;
        boolean ok;
        do {
            ok = true;
            FileStatus status = fs.getFileStatus(testFile);
            HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
            long uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
            for (String host : hosts) {
                long weight = blocksDistribution.getWeight(host);
                ok = (ok && uniqueBlocksTotalWeight == weight);
            }
        } while (!ok && System.currentTimeMillis() < maxTime);
        assertTrue(ok);
    } finally {
        htu.shutdownMiniDFSCluster();
    }
    try {
        // set up a cluster with 4 nodes
        String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with three blocks
        testFile = new Path("/test2.txt");
        WriteDataToHDFS(fs, testFile, 3 * DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, we will have total of 9
        // replica of blocks; thus the host with the highest weight should have
        // weight == 3 * DEFAULT_BLOCK_SIZE
        final long maxTime = System.currentTimeMillis() + 2000;
        long weight;
        long uniqueBlocksTotalWeight;
        do {
            FileStatus status = fs.getFileStatus(testFile);
            HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
            uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
            String tophost = blocksDistribution.getTopHosts().get(0);
            weight = blocksDistribution.getWeight(tophost);
        // NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
        } while (uniqueBlocksTotalWeight != weight && System.currentTimeMillis() < maxTime);
        assertTrue(uniqueBlocksTotalWeight == weight);
    } finally {
        htu.shutdownMiniDFSCluster();
    }
    try {
        // set up a cluster with 4 nodes
        String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with one block
        testFile = new Path("/test3.txt");
        WriteDataToHDFS(fs, testFile, DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, we will have total of 3
        // replica of blocks; thus there is one host without weight
        final long maxTime = System.currentTimeMillis() + 2000;
        HDFSBlocksDistribution blocksDistribution;
        do {
            FileStatus status = fs.getFileStatus(testFile);
            blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
        // NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
        } while (blocksDistribution.getTopHosts().size() != 3 && System.currentTimeMillis() < maxTime);
        assertEquals("Wrong number of hosts distributing blocks.", 3, blocksDistribution.getTopHosts().size());
    } finally {
        htu.shutdownMiniDFSCluster();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileStatus(org.apache.hadoop.fs.FileStatus) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) Test(org.junit.Test)

Example 10 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class HRegion method computeHDFSBlocksDistribution.

/**
   * This is a helper function to compute HDFS block distribution on demand
   * @param conf configuration
   * @param tableDescriptor HTableDescriptor of the table
   * @param regionInfo encoded name of the region
   * @param tablePath the table directory
   * @return The HDFS blocks distribution for the given region.
   * @throws IOException
   */
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException {
    HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
    FileSystem fs = tablePath.getFileSystem(conf);
    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
    for (HColumnDescriptor family : tableDescriptor.getFamilies()) {
        List<LocatedFileStatus> locatedFileStatusList = HRegionFileSystem.getStoreFilesLocatedStatus(regionFs, family.getNameAsString(), true);
        if (locatedFileStatusList == null) {
            continue;
        }
        for (LocatedFileStatus status : locatedFileStatusList) {
            Path p = status.getPath();
            if (StoreFileInfo.isReference(p) || HFileLink.isHFileLink(p)) {
                // Only construct StoreFileInfo object if its not a hfile, save obj
                // creation
                StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
                hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs));
            } else if (StoreFileInfo.isHFile(p)) {
                // If its a HFile, then lets just add to the block distribution
                // lets not create more objects here, not even another HDFSBlocksDistribution
                FSUtils.addToHDFSBlocksDistribution(hdfsBlocksDistribution, status.getBlockLocations());
            } else {
                throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
            }
        }
    }
    return hdfsBlocksDistribution;
}

Also used : Path(org.apache.hadoop.fs.Path) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Aggregations

HDFSBlocksDistribution (org.apache.hadoop.hbase.HDFSBlocksDistribution)12 Test (org.junit.Test)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 HBaseTestingUtility (org.apache.hadoop.hbase.HBaseTestingUtility)3 ArrayList (java.util.ArrayList)2 ExecutionException (java.util.concurrent.ExecutionException)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)2 Table (org.apache.hadoop.hbase.client.Table)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 BlockLocation (org.apache.hadoop.fs.BlockLocation)1 Cell (org.apache.hadoop.hbase.Cell)1 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)1 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)1