Examples with HDFSBlocksDistribution - org.apache.hadoop.hbase.HDFSBlocksDistribution

Example 1 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestHFileOutputFormat2 method doIncrementalLoadTest.

private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, String tableStr) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
        // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
        // explicit hostnames parameter just like MiniDFSCluster does.
        hostCount = 3;
        regionNum = 20;
    }
    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
        hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);
    TableName tableName = TableName.valueOf(tableStr);
    Table table = util.createTable(tableName, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    FileSystem fs = testDir.getFileSystem(conf);
    try (RegionLocator r = util.getConnection().getRegionLocator(tableName);
        Admin admin = util.getConnection().getAdmin()) {
        assertEquals("Should start with empty table", 0, util.countRows(table));
        int numRegions = r.getStartKeys().length;
        assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
        // Generate the bulk load files
        runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir, putSortReducer);
        // This doesn't write into the table, just makes files
        assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
        // Make sure that a directory was created for every CF
        int dir = 0;
        for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
            for (byte[] family : FAMILIES) {
                if (Bytes.toString(family).equals(f.getPath().getName())) {
                    ++dir;
                }
            }
        }
        assertEquals("Column family not found in FS.", FAMILIES.length, dir);
        // handle the split case
        if (shouldChangeRegions) {
            LOG.info("Changing regions in table");
            admin.disableTable(table.getName());
            util.waitUntilNoRegionsInTransition();
            util.deleteTable(table.getName());
            byte[][] newSplitKeys = generateRandomSplitKeys(14);
            table = util.createTable(tableName, FAMILIES, newSplitKeys);
            while (util.getConnection().getRegionLocator(tableName).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
                Thread.sleep(200);
                LOG.info("Waiting for new region assignment to happen");
            }
        }
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);
        // Ensure data shows up
        int expectedRows = 0;
        if (putSortReducer) {
            // no rows should be extracted
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
        } else {
            expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
            Scan scan = new Scan();
            ResultScanner results = table.getScanner(scan);
            for (Result res : results) {
                assertEquals(FAMILIES.length, res.rawCells().length);
                Cell first = res.rawCells()[0];
                for (Cell kv : res.rawCells()) {
                    assertTrue(CellUtil.matchingRow(first, kv));
                    assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
                }
            }
            results.close();
        }
        String tableDigestBefore = util.checksumRows(table);
        // Check region locality
        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
        for (HRegion region : util.getHBaseCluster().getRegions(tableName)) {
            hbd.add(region.getHDFSBlocksDistribution());
        }
        for (String hostname : hostnames) {
            float locality = hbd.getBlockLocalityIndex(hostname);
            LOG.info("locality of [" + hostname + "]: " + locality);
            assertEquals(100, (int) (locality * 100));
        }
        // Cause regions to reopen
        admin.disableTable(tableName);
        while (!admin.isTableDisabled(tableName)) {
            Thread.sleep(200);
            LOG.info("Waiting for table to disable");
        }
        admin.enableTable(tableName);
        util.waitTableAvailable(tableName);
        assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table));
    } finally {
        testDir.getFileSystem(conf).delete(testDir, true);
        util.deleteTable(tableName);
        util.shutdownMiniCluster();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Admin(org.apache.hadoop.hbase.client.Admin) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 2 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method getTopBlockLocations.

/**
   * Returns an ordered list of hosts which have better locality for this region
   * than the current host.
   */
protected List<ServerName> getTopBlockLocations(HRegionInfo region, String currentHost) {
    HDFSBlocksDistribution blocksDistribution = getBlockDistribution(region);
    List<String> topHosts = new ArrayList<>();
    for (String host : blocksDistribution.getTopHosts()) {
        if (host.equals(currentHost)) {
            break;
        }
        topHosts.add(host);
    }
    return mapHostNameToServerName(topHosts);
}

Also used : ArrayList(java.util.ArrayList) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 3 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method getBlockDistribution.

public HDFSBlocksDistribution getBlockDistribution(HRegionInfo hri) {
    HDFSBlocksDistribution blockDistbn = null;
    try {
        if (cache.asMap().containsKey(hri)) {
            blockDistbn = cache.get(hri);
            return blockDistbn;
        } else {
            LOG.debug("HDFSBlocksDistribution not found in cache for region " + hri.getRegionNameAsString());
            blockDistbn = internalGetTopBlockLocation(hri);
            cache.put(hri, blockDistbn);
            return blockDistbn;
        }
    } catch (ExecutionException e) {
        LOG.warn("Error while fetching cache entry ", e);
        blockDistbn = internalGetTopBlockLocation(hri);
        cache.put(hri, blockDistbn);
        return blockDistbn;
    }
}

Also used : ExecutionException(java.util.concurrent.ExecutionException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 4 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class DateTieredCompactionPolicy method shouldPerformMajorCompaction.

public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
    long mcTime = getNextMajorCompactTime(filesToCompact);
    if (filesToCompact == null || mcTime == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("filesToCompact: " + filesToCompact + " mcTime: " + mcTime);
        }
        return false;
    }
    // TODO: Use better method for determining stamp of last major (HBASE-2990)
    long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
    long now = EnvironmentEdgeManager.currentTime();
    if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("lowTimestamp: " + lowTimestamp + " lowTimestamp: " + lowTimestamp + " now: " + now + " mcTime: " + mcTime);
        }
        return false;
    }
    long cfTTL = this.storeConfigInfo.getStoreFileTtl();
    HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
    List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
    boolean[] filesInWindow = new boolean[boundaries.size()];
    for (StoreFile file : filesToCompact) {
        Long minTimestamp = file.getMinimumTimestamp();
        long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
        if (cfTTL != Long.MAX_VALUE && oldest >= cfTTL) {
            LOG.debug("Major compaction triggered on store " + this + "; for TTL maintenance");
            return true;
        }
        if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
            LOG.debug("Major compaction triggered on store " + this + ", because there are new files and time since last major compaction " + (now - lowTimestamp) + "ms");
            return true;
        }
        int lowerWindowIndex = Collections.binarySearch(boundaries, minTimestamp == null ? (Long) Long.MAX_VALUE : minTimestamp);
        int upperWindowIndex = Collections.binarySearch(boundaries, file.getMaximumTimestamp() == null ? (Long) Long.MAX_VALUE : file.getMaximumTimestamp());
        // Handle boundary conditions and negative values of binarySearch
        lowerWindowIndex = (lowerWindowIndex < 0) ? Math.abs(lowerWindowIndex + 2) : lowerWindowIndex;
        upperWindowIndex = (upperWindowIndex < 0) ? Math.abs(upperWindowIndex + 2) : upperWindowIndex;
        if (lowerWindowIndex != upperWindowIndex) {
            LOG.debug("Major compaction triggered on store " + this + "; because file " + file.getPath() + " has data with timestamps cross window boundaries");
            return true;
        } else if (filesInWindow[upperWindowIndex]) {
            LOG.debug("Major compaction triggered on store " + this + "; because there are more than one file in some windows");
            return true;
        } else {
            filesInWindow[upperWindowIndex] = true;
        }
        hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
    }
    float blockLocalityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
    if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
        LOG.debug("Major compaction triggered on store " + this + "; to make hdfs blocks local, current blockLocalityIndex is " + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
        return true;
    }
    LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
    return false;
}

Also used : StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 5 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class BalancerClusterState method getLowestLocalityRegionOnServer.

int getLowestLocalityRegionOnServer(int serverIndex) {
    if (regionFinder != null) {
        float lowestLocality = 1.0f;
        int lowestLocalityRegionIndex = -1;
        if (regionsPerServer[serverIndex].length == 0) {
            // No regions on that region server
            return -1;
        }
        for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
            int regionIndex = regionsPerServer[serverIndex][j];
            HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[regionIndex]);
            float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
            // skip empty region
            if (distribution.getUniqueBlocksTotalWeight() == 0) {
                continue;
            }
            if (locality < lowestLocality) {
                lowestLocality = locality;
                lowestLocalityRegionIndex = j;
            }
        }
        if (lowestLocalityRegionIndex == -1) {
            return -1;
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("Lowest locality region is " + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]].getRegionNameAsString() + " with locality " + lowestLocality + " and its region server contains " + regionsPerServer[serverIndex].length + " regions");
        }
        return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
    } else {
        return -1;
    }
}

Also used : HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Aggregations

HDFSBlocksDistribution (org.apache.hadoop.hbase.HDFSBlocksDistribution)29 Test (org.junit.Test)11 Path (org.apache.hadoop.fs.Path)8 ArrayList (java.util.ArrayList)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 HashMap (java.util.HashMap)5 ExecutionException (java.util.concurrent.ExecutionException)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)5 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 FileStatus (org.apache.hadoop.fs.FileStatus)4 InterruptedIOException (java.io.InterruptedIOException)3 Cell (org.apache.hadoop.hbase.Cell)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 Result (org.apache.hadoop.hbase.client.Result)3 Scan (org.apache.hadoop.hbase.client.Scan)3 TimeoutIOException (org.apache.hadoop.hbase.exceptions.TimeoutIOException)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2