use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class DateTieredCompactionPolicy method shouldPerformMajorCompaction.
public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
long mcTime = getNextMajorCompactTime(filesToCompact);
if (filesToCompact == null || mcTime == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("filesToCompact: " + filesToCompact + " mcTime: " + mcTime);
}
return false;
}
// TODO: Use better method for determining stamp of last major (HBASE-2990)
long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
long now = EnvironmentEdgeManager.currentTime();
if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
if (LOG.isDebugEnabled()) {
LOG.debug("lowTimestamp: " + lowTimestamp + " lowTimestamp: " + lowTimestamp + " now: " + now + " mcTime: " + mcTime);
}
return false;
}
long cfTTL = this.storeConfigInfo.getStoreFileTtl();
HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
boolean[] filesInWindow = new boolean[boundaries.size()];
for (StoreFile file : filesToCompact) {
Long minTimestamp = file.getMinimumTimestamp();
long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
if (cfTTL != Long.MAX_VALUE && oldest >= cfTTL) {
LOG.debug("Major compaction triggered on store " + this + "; for TTL maintenance");
return true;
}
if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
LOG.debug("Major compaction triggered on store " + this + ", because there are new files and time since last major compaction " + (now - lowTimestamp) + "ms");
return true;
}
int lowerWindowIndex = Collections.binarySearch(boundaries, minTimestamp == null ? (Long) Long.MAX_VALUE : minTimestamp);
int upperWindowIndex = Collections.binarySearch(boundaries, file.getMaximumTimestamp() == null ? (Long) Long.MAX_VALUE : file.getMaximumTimestamp());
// Handle boundary conditions and negative values of binarySearch
lowerWindowIndex = (lowerWindowIndex < 0) ? Math.abs(lowerWindowIndex + 2) : lowerWindowIndex;
upperWindowIndex = (upperWindowIndex < 0) ? Math.abs(upperWindowIndex + 2) : upperWindowIndex;
if (lowerWindowIndex != upperWindowIndex) {
LOG.debug("Major compaction triggered on store " + this + "; because file " + file.getPath() + " has data with timestamps cross window boundaries");
return true;
} else if (filesInWindow[upperWindowIndex]) {
LOG.debug("Major compaction triggered on store " + this + "; because there are more than one file in some windows");
return true;
} else {
filesInWindow[upperWindowIndex] = true;
}
hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
}
float blockLocalityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
LOG.debug("Major compaction triggered on store " + this + "; to make hdfs blocks local, current blockLocalityIndex is " + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
return true;
}
LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
return false;
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class FSUtils method computeHDFSBlocksDistribution.
/**
* Compute HDFS blocks distribution of a given file, or a portion of the file
* @param fs file system
* @param status file status of the file
* @param start start position of the portion
* @param length length of the portion
* @return The HDFS blocks distribution
*/
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs, FileStatus status, long start, long length) throws IOException {
HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
BlockLocation[] blockLocations = fs.getFileBlockLocations(status, start, length);
for (BlockLocation bl : blockLocations) {
String[] hosts = bl.getHosts();
long len = bl.getLength();
blocksDistribution.addHostsAndBlockWeight(hosts, len);
}
return blocksDistribution;
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class RegionLocationFinder method refreshAndWait.
public void refreshAndWait(Collection<HRegionInfo> hris) {
ArrayList<ListenableFuture<HDFSBlocksDistribution>> regionLocationFutures = new ArrayList<>(hris.size());
for (HRegionInfo hregionInfo : hris) {
regionLocationFutures.add(asyncGetBlockDistribution(hregionInfo));
}
int index = 0;
for (HRegionInfo hregionInfo : hris) {
ListenableFuture<HDFSBlocksDistribution> future = regionLocationFutures.get(index);
try {
cache.put(hregionInfo, future.get());
} catch (InterruptedException ite) {
Thread.currentThread().interrupt();
} catch (ExecutionException ee) {
LOG.debug("ExecutionException during HDFSBlocksDistribution computation. for region = " + hregionInfo.getEncodedName(), ee);
}
index++;
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class TestFSUtils method testcomputeHDFSBlocksDistribution.
@Test
public void testcomputeHDFSBlocksDistribution() throws Exception {
HBaseTestingUtility htu = new HBaseTestingUtility();
final int DEFAULT_BLOCK_SIZE = 1024;
htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
MiniDFSCluster cluster = null;
Path testFile = null;
try {
// set up a cluster with 3 nodes
String[] hosts = new String[] { "host1", "host2", "host3" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with two blocks
testFile = new Path("/test1.txt");
WriteDataToHDFS(fs, testFile, 2 * DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, the same as the number of
// datanodes; the locality index for each host should be 100%,
// or getWeight for each host should be the same as getUniqueBlocksWeights
final long maxTime = System.currentTimeMillis() + 2000;
boolean ok;
do {
ok = true;
FileStatus status = fs.getFileStatus(testFile);
HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
long uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
for (String host : hosts) {
long weight = blocksDistribution.getWeight(host);
ok = (ok && uniqueBlocksTotalWeight == weight);
}
} while (!ok && System.currentTimeMillis() < maxTime);
assertTrue(ok);
} finally {
htu.shutdownMiniDFSCluster();
}
try {
// set up a cluster with 4 nodes
String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with three blocks
testFile = new Path("/test2.txt");
WriteDataToHDFS(fs, testFile, 3 * DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, we will have total of 9
// replica of blocks; thus the host with the highest weight should have
// weight == 3 * DEFAULT_BLOCK_SIZE
final long maxTime = System.currentTimeMillis() + 2000;
long weight;
long uniqueBlocksTotalWeight;
do {
FileStatus status = fs.getFileStatus(testFile);
HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
String tophost = blocksDistribution.getTopHosts().get(0);
weight = blocksDistribution.getWeight(tophost);
// NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
} while (uniqueBlocksTotalWeight != weight && System.currentTimeMillis() < maxTime);
assertTrue(uniqueBlocksTotalWeight == weight);
} finally {
htu.shutdownMiniDFSCluster();
}
try {
// set up a cluster with 4 nodes
String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with one block
testFile = new Path("/test3.txt");
WriteDataToHDFS(fs, testFile, DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, we will have total of 3
// replica of blocks; thus there is one host without weight
final long maxTime = System.currentTimeMillis() + 2000;
HDFSBlocksDistribution blocksDistribution;
do {
FileStatus status = fs.getFileStatus(testFile);
blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
// NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
} while (blocksDistribution.getTopHosts().size() != 3 && System.currentTimeMillis() < maxTime);
assertEquals("Wrong number of hosts distributing blocks.", 3, blocksDistribution.getTopHosts().size());
} finally {
htu.shutdownMiniDFSCluster();
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class HRegion method computeHDFSBlocksDistribution.
/**
* This is a helper function to compute HDFS block distribution on demand
* @param conf configuration
* @param tableDescriptor HTableDescriptor of the table
* @param regionInfo encoded name of the region
* @param tablePath the table directory
* @return The HDFS blocks distribution for the given region.
* @throws IOException
*/
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException {
HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
FileSystem fs = tablePath.getFileSystem(conf);
HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
for (HColumnDescriptor family : tableDescriptor.getFamilies()) {
List<LocatedFileStatus> locatedFileStatusList = HRegionFileSystem.getStoreFilesLocatedStatus(regionFs, family.getNameAsString(), true);
if (locatedFileStatusList == null) {
continue;
}
for (LocatedFileStatus status : locatedFileStatusList) {
Path p = status.getPath();
if (StoreFileInfo.isReference(p) || HFileLink.isHFileLink(p)) {
// Only construct StoreFileInfo object if its not a hfile, save obj
// creation
StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs));
} else if (StoreFileInfo.isHFile(p)) {
// If its a HFile, then lets just add to the block distribution
// lets not create more objects here, not even another HDFSBlocksDistribution
FSUtils.addToHDFSBlocksDistribution(hdfsBlocksDistribution, status.getBlockLocations());
} else {
throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
}
}
}
return hdfsBlocksDistribution;
}
Aggregations