use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class TestHFileOutputFormat2 method doIncrementalLoadTest.
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, String tableStr) throws Exception {
util = new HBaseTestingUtility();
Configuration conf = util.getConfiguration();
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
int hostCount = 1;
int regionNum = 5;
if (shouldKeepLocality) {
// We should change host count higher than hdfs replica count when MiniHBaseCluster supports
// explicit hostnames parameter just like MiniDFSCluster does.
hostCount = 3;
regionNum = 20;
}
byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
String[] hostnames = new String[hostCount];
for (int i = 0; i < hostCount; ++i) {
hostnames[i] = "datanode_" + i;
}
util.startMiniCluster(1, hostCount, hostnames);
TableName tableName = TableName.valueOf(tableStr);
Table table = util.createTable(tableName, FAMILIES, splitKeys);
Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
FileSystem fs = testDir.getFileSystem(conf);
try (RegionLocator r = util.getConnection().getRegionLocator(tableName);
Admin admin = util.getConnection().getAdmin()) {
assertEquals("Should start with empty table", 0, util.countRows(table));
int numRegions = r.getStartKeys().length;
assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
// Generate the bulk load files
runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir, putSortReducer);
// This doesn't write into the table, just makes files
assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
// Make sure that a directory was created for every CF
int dir = 0;
for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
for (byte[] family : FAMILIES) {
if (Bytes.toString(family).equals(f.getPath().getName())) {
++dir;
}
}
}
assertEquals("Column family not found in FS.", FAMILIES.length, dir);
// handle the split case
if (shouldChangeRegions) {
LOG.info("Changing regions in table");
admin.disableTable(table.getName());
util.waitUntilNoRegionsInTransition();
util.deleteTable(table.getName());
byte[][] newSplitKeys = generateRandomSplitKeys(14);
table = util.createTable(tableName, FAMILIES, newSplitKeys);
while (util.getConnection().getRegionLocator(tableName).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
Thread.sleep(200);
LOG.info("Waiting for new region assignment to happen");
}
}
// Perform the actual load
new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);
// Ensure data shows up
int expectedRows = 0;
if (putSortReducer) {
// no rows should be extracted
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
} else {
expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
Scan scan = new Scan();
ResultScanner results = table.getScanner(scan);
for (Result res : results) {
assertEquals(FAMILIES.length, res.rawCells().length);
Cell first = res.rawCells()[0];
for (Cell kv : res.rawCells()) {
assertTrue(CellUtil.matchingRow(first, kv));
assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
}
}
results.close();
}
String tableDigestBefore = util.checksumRows(table);
// Check region locality
HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
for (HRegion region : util.getHBaseCluster().getRegions(tableName)) {
hbd.add(region.getHDFSBlocksDistribution());
}
for (String hostname : hostnames) {
float locality = hbd.getBlockLocalityIndex(hostname);
LOG.info("locality of [" + hostname + "]: " + locality);
assertEquals(100, (int) (locality * 100));
}
// Cause regions to reopen
admin.disableTable(tableName);
while (!admin.isTableDisabled(tableName)) {
Thread.sleep(200);
LOG.info("Waiting for table to disable");
}
admin.enableTable(tableName);
util.waitTableAvailable(tableName);
assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table));
} finally {
testDir.getFileSystem(conf).delete(testDir, true);
util.deleteTable(tableName);
util.shutdownMiniCluster();
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class RegionLocationFinder method getTopBlockLocations.
/**
* Returns an ordered list of hosts which have better locality for this region
* than the current host.
*/
protected List<ServerName> getTopBlockLocations(HRegionInfo region, String currentHost) {
HDFSBlocksDistribution blocksDistribution = getBlockDistribution(region);
List<String> topHosts = new ArrayList<>();
for (String host : blocksDistribution.getTopHosts()) {
if (host.equals(currentHost)) {
break;
}
topHosts.add(host);
}
return mapHostNameToServerName(topHosts);
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class RegionLocationFinder method getBlockDistribution.
public HDFSBlocksDistribution getBlockDistribution(HRegionInfo hri) {
HDFSBlocksDistribution blockDistbn = null;
try {
if (cache.asMap().containsKey(hri)) {
blockDistbn = cache.get(hri);
return blockDistbn;
} else {
LOG.debug("HDFSBlocksDistribution not found in cache for region " + hri.getRegionNameAsString());
blockDistbn = internalGetTopBlockLocation(hri);
cache.put(hri, blockDistbn);
return blockDistbn;
}
} catch (ExecutionException e) {
LOG.warn("Error while fetching cache entry ", e);
blockDistbn = internalGetTopBlockLocation(hri);
cache.put(hri, blockDistbn);
return blockDistbn;
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class DateTieredCompactionPolicy method shouldPerformMajorCompaction.
public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
long mcTime = getNextMajorCompactTime(filesToCompact);
if (filesToCompact == null || mcTime == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("filesToCompact: " + filesToCompact + " mcTime: " + mcTime);
}
return false;
}
// TODO: Use better method for determining stamp of last major (HBASE-2990)
long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
long now = EnvironmentEdgeManager.currentTime();
if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
if (LOG.isDebugEnabled()) {
LOG.debug("lowTimestamp: " + lowTimestamp + " lowTimestamp: " + lowTimestamp + " now: " + now + " mcTime: " + mcTime);
}
return false;
}
long cfTTL = this.storeConfigInfo.getStoreFileTtl();
HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
boolean[] filesInWindow = new boolean[boundaries.size()];
for (StoreFile file : filesToCompact) {
Long minTimestamp = file.getMinimumTimestamp();
long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
if (cfTTL != Long.MAX_VALUE && oldest >= cfTTL) {
LOG.debug("Major compaction triggered on store " + this + "; for TTL maintenance");
return true;
}
if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
LOG.debug("Major compaction triggered on store " + this + ", because there are new files and time since last major compaction " + (now - lowTimestamp) + "ms");
return true;
}
int lowerWindowIndex = Collections.binarySearch(boundaries, minTimestamp == null ? (Long) Long.MAX_VALUE : minTimestamp);
int upperWindowIndex = Collections.binarySearch(boundaries, file.getMaximumTimestamp() == null ? (Long) Long.MAX_VALUE : file.getMaximumTimestamp());
// Handle boundary conditions and negative values of binarySearch
lowerWindowIndex = (lowerWindowIndex < 0) ? Math.abs(lowerWindowIndex + 2) : lowerWindowIndex;
upperWindowIndex = (upperWindowIndex < 0) ? Math.abs(upperWindowIndex + 2) : upperWindowIndex;
if (lowerWindowIndex != upperWindowIndex) {
LOG.debug("Major compaction triggered on store " + this + "; because file " + file.getPath() + " has data with timestamps cross window boundaries");
return true;
} else if (filesInWindow[upperWindowIndex]) {
LOG.debug("Major compaction triggered on store " + this + "; because there are more than one file in some windows");
return true;
} else {
filesInWindow[upperWindowIndex] = true;
}
hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
}
float blockLocalityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
LOG.debug("Major compaction triggered on store " + this + "; to make hdfs blocks local, current blockLocalityIndex is " + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
return true;
}
LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
return false;
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class BalancerClusterState method getLowestLocalityRegionOnServer.
int getLowestLocalityRegionOnServer(int serverIndex) {
if (regionFinder != null) {
float lowestLocality = 1.0f;
int lowestLocalityRegionIndex = -1;
if (regionsPerServer[serverIndex].length == 0) {
// No regions on that region server
return -1;
}
for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
int regionIndex = regionsPerServer[serverIndex][j];
HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[regionIndex]);
float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
// skip empty region
if (distribution.getUniqueBlocksTotalWeight() == 0) {
continue;
}
if (locality < lowestLocality) {
lowestLocality = locality;
lowestLocalityRegionIndex = j;
}
}
if (lowestLocalityRegionIndex == -1) {
return -1;
}
if (LOG.isTraceEnabled()) {
LOG.trace("Lowest locality region is " + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]].getRegionNameAsString() + " with locality " + lowestLocality + " and its region server contains " + regionsPerServer[serverIndex].length + " regions");
}
return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
} else {
return -1;
}
}
Aggregations