Example 1 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestHFileOutputFormat2 method doIncrementalLoadTest.

private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, String tableStr) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
        // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
        // explicit hostnames parameter just like MiniDFSCluster does.
        hostCount = 3;
        regionNum = 20;
    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
        hostnames[i] = "datanode_" + i;
    util.startMiniCluster(1, hostCount, hostnames);
    TableName tableName = TableName.valueOf(tableStr);
    Table table = util.createTable(tableName, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    FileSystem fs = testDir.getFileSystem(conf);
    try (RegionLocator r = util.getConnection().getRegionLocator(tableName);
        Admin admin = util.getConnection().getAdmin()) {
        assertEquals("Should start with empty table", 0, util.countRows(table));
        int numRegions = r.getStartKeys().length;
        assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
        // Generate the bulk load files
        runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir, putSortReducer);
        // This doesn't write into the table, just makes files
        assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
        // Make sure that a directory was created for every CF
        int dir = 0;
        for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
            for (byte[] family : FAMILIES) {
                if (Bytes.toString(family).equals(f.getPath().getName())) {
        assertEquals("Column family not found in FS.", FAMILIES.length, dir);
        // handle the split case
        if (shouldChangeRegions) {
  "Changing regions in table");
            byte[][] newSplitKeys = generateRandomSplitKeys(14);
            table = util.createTable(tableName, FAMILIES, newSplitKeys);
            while (util.getConnection().getRegionLocator(tableName).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
      "Waiting for new region assignment to happen");
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);
        // Ensure data shows up
        int expectedRows = 0;
        if (putSortReducer) {
            // no rows should be extracted
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
        } else {
            expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
            Scan scan = new Scan();
            ResultScanner results = table.getScanner(scan);
            for (Result res : results) {
                assertEquals(FAMILIES.length, res.rawCells().length);
                Cell first = res.rawCells()[0];
                for (Cell kv : res.rawCells()) {
                    assertTrue(CellUtil.matchingRow(first, kv));
                    assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        String tableDigestBefore = util.checksumRows(table);
        // Check region locality
        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
        for (HRegion region : util.getHBaseCluster().getRegions(tableName)) {
        for (String hostname : hostnames) {
            float locality = hbd.getBlockLocalityIndex(hostname);
  "locality of [" + hostname + "]: " + locality);
            assertEquals(100, (int) (locality * 100));
        // Cause regions to reopen
        while (!admin.isTableDisabled(tableName)) {
  "Waiting for table to disable");
        assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table));
    } finally {
        testDir.getFileSystem(conf).delete(testDir, true);
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Admin(org.apache.hadoop.hbase.client.Admin) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 2 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method getTopBlockLocations.

   * Returns an ordered list of hosts which have better locality for this region
   * than the current host.
protected List<ServerName> getTopBlockLocations(HRegionInfo region, String currentHost) {
    HDFSBlocksDistribution blocksDistribution = getBlockDistribution(region);
    List<String> topHosts = new ArrayList<>();
    for (String host : blocksDistribution.getTopHosts()) {
        if (host.equals(currentHost)) {
    return mapHostNameToServerName(topHosts);
Also used : ArrayList(java.util.ArrayList) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 3 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method getBlockDistribution.

public HDFSBlocksDistribution getBlockDistribution(HRegionInfo hri) {
    HDFSBlocksDistribution blockDistbn = null;
    try {
        if (cache.asMap().containsKey(hri)) {
            blockDistbn = cache.get(hri);
            return blockDistbn;
        } else {
            LOG.debug("HDFSBlocksDistribution not found in cache for region " + hri.getRegionNameAsString());
            blockDistbn = internalGetTopBlockLocation(hri);
            cache.put(hri, blockDistbn);
            return blockDistbn;
    } catch (ExecutionException e) {
        LOG.warn("Error while fetching cache entry ", e);
        blockDistbn = internalGetTopBlockLocation(hri);
        cache.put(hri, blockDistbn);
        return blockDistbn;
Also used : ExecutionException(java.util.concurrent.ExecutionException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 4 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class DateTieredCompactionPolicy method shouldPerformMajorCompaction.

public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
    long mcTime = getNextMajorCompactTime(filesToCompact);
    if (filesToCompact == null || mcTime == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("filesToCompact: " + filesToCompact + " mcTime: " + mcTime);
        return false;
    // TODO: Use better method for determining stamp of last major (HBASE-2990)
    long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
    long now = EnvironmentEdgeManager.currentTime();
    if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("lowTimestamp: " + lowTimestamp + " lowTimestamp: " + lowTimestamp + " now: " + now + " mcTime: " + mcTime);
        return false;
    long cfTTL = this.storeConfigInfo.getStoreFileTtl();
    HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
    List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
    boolean[] filesInWindow = new boolean[boundaries.size()];
    for (StoreFile file : filesToCompact) {
        Long minTimestamp = file.getMinimumTimestamp();
        long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
        if (cfTTL != Long.MAX_VALUE && oldest >= cfTTL) {
            LOG.debug("Major compaction triggered on store " + this + "; for TTL maintenance");
            return true;
        if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
            LOG.debug("Major compaction triggered on store " + this + ", because there are new files and time since last major compaction " + (now - lowTimestamp) + "ms");
            return true;
        int lowerWindowIndex = Collections.binarySearch(boundaries, minTimestamp == null ? (Long) Long.MAX_VALUE : minTimestamp);
        int upperWindowIndex = Collections.binarySearch(boundaries, file.getMaximumTimestamp() == null ? (Long) Long.MAX_VALUE : file.getMaximumTimestamp());
        // Handle boundary conditions and negative values of binarySearch
        lowerWindowIndex = (lowerWindowIndex < 0) ? Math.abs(lowerWindowIndex + 2) : lowerWindowIndex;
        upperWindowIndex = (upperWindowIndex < 0) ? Math.abs(upperWindowIndex + 2) : upperWindowIndex;
        if (lowerWindowIndex != upperWindowIndex) {
            LOG.debug("Major compaction triggered on store " + this + "; because file " + file.getPath() + " has data with timestamps cross window boundaries");
            return true;
        } else if (filesInWindow[upperWindowIndex]) {
            LOG.debug("Major compaction triggered on store " + this + "; because there are more than one file in some windows");
            return true;
        } else {
            filesInWindow[upperWindowIndex] = true;
    float blockLocalityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
    if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
        LOG.debug("Major compaction triggered on store " + this + "; to make hdfs blocks local, current blockLocalityIndex is " + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
        return true;
    LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
    return false;
Also used : StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 5 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class BalancerClusterState method getLowestLocalityRegionOnServer.

int getLowestLocalityRegionOnServer(int serverIndex) {
    if (regionFinder != null) {
        float lowestLocality = 1.0f;
        int lowestLocalityRegionIndex = -1;
        if (regionsPerServer[serverIndex].length == 0) {
            // No regions on that region server
            return -1;
        for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
            int regionIndex = regionsPerServer[serverIndex][j];
            HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[regionIndex]);
            float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
            // skip empty region
            if (distribution.getUniqueBlocksTotalWeight() == 0) {
            if (locality < lowestLocality) {
                lowestLocality = locality;
                lowestLocalityRegionIndex = j;
        if (lowestLocalityRegionIndex == -1) {
            return -1;
        if (LOG.isTraceEnabled()) {
            LOG.trace("Lowest locality region is " + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]].getRegionNameAsString() + " with locality " + lowestLocality + " and its region server contains " + regionsPerServer[serverIndex].length + " regions");
        return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
    } else {
        return -1;
Also used : HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)


