use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class RegionLocationFinder method refreshAndWait.
public void refreshAndWait(Collection<HRegionInfo> hris) {
ArrayList<ListenableFuture<HDFSBlocksDistribution>> regionLocationFutures = new ArrayList<>(hris.size());
for (HRegionInfo hregionInfo : hris) {
regionLocationFutures.add(asyncGetBlockDistribution(hregionInfo));
}
int index = 0;
for (HRegionInfo hregionInfo : hris) {
ListenableFuture<HDFSBlocksDistribution> future = regionLocationFutures.get(index);
try {
cache.put(hregionInfo, future.get());
} catch (InterruptedException ite) {
Thread.currentThread().interrupt();
} catch (ExecutionException ee) {
LOG.debug("ExecutionException during HDFSBlocksDistribution computation. for region = " + hregionInfo.getEncodedName(), ee);
}
index++;
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class TestFSUtils method testcomputeHDFSBlocksDistribution.
@Test
public void testcomputeHDFSBlocksDistribution() throws Exception {
HBaseTestingUtility htu = new HBaseTestingUtility();
final int DEFAULT_BLOCK_SIZE = 1024;
htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
MiniDFSCluster cluster = null;
Path testFile = null;
try {
// set up a cluster with 3 nodes
String[] hosts = new String[] { "host1", "host2", "host3" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with two blocks
testFile = new Path("/test1.txt");
WriteDataToHDFS(fs, testFile, 2 * DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, the same as the number of
// datanodes; the locality index for each host should be 100%,
// or getWeight for each host should be the same as getUniqueBlocksWeights
final long maxTime = System.currentTimeMillis() + 2000;
boolean ok;
do {
ok = true;
FileStatus status = fs.getFileStatus(testFile);
HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
long uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
for (String host : hosts) {
long weight = blocksDistribution.getWeight(host);
ok = (ok && uniqueBlocksTotalWeight == weight);
}
} while (!ok && System.currentTimeMillis() < maxTime);
assertTrue(ok);
} finally {
htu.shutdownMiniDFSCluster();
}
try {
// set up a cluster with 4 nodes
String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with three blocks
testFile = new Path("/test2.txt");
WriteDataToHDFS(fs, testFile, 3 * DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, we will have total of 9
// replica of blocks; thus the host with the highest weight should have
// weight == 3 * DEFAULT_BLOCK_SIZE
final long maxTime = System.currentTimeMillis() + 2000;
long weight;
long uniqueBlocksTotalWeight;
do {
FileStatus status = fs.getFileStatus(testFile);
HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
String tophost = blocksDistribution.getTopHosts().get(0);
weight = blocksDistribution.getWeight(tophost);
// NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
} while (uniqueBlocksTotalWeight != weight && System.currentTimeMillis() < maxTime);
assertTrue(uniqueBlocksTotalWeight == weight);
} finally {
htu.shutdownMiniDFSCluster();
}
try {
// set up a cluster with 4 nodes
String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
cluster = htu.startMiniDFSCluster(hosts);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create a file with one block
testFile = new Path("/test3.txt");
WriteDataToHDFS(fs, testFile, DEFAULT_BLOCK_SIZE);
// given the default replication factor is 3, we will have total of 3
// replica of blocks; thus there is one host without weight
final long maxTime = System.currentTimeMillis() + 2000;
HDFSBlocksDistribution blocksDistribution;
do {
FileStatus status = fs.getFileStatus(testFile);
blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
// NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
} while (blocksDistribution.getTopHosts().size() != 3 && System.currentTimeMillis() < maxTime);
assertEquals("Wrong number of hosts distributing blocks.", 3, blocksDistribution.getTopHosts().size());
} finally {
htu.shutdownMiniDFSCluster();
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class HRegion method computeHDFSBlocksDistribution.
/**
* This is a helper function to compute HDFS block distribution on demand
* @param conf configuration
* @param tableDescriptor HTableDescriptor of the table
* @param regionInfo encoded name of the region
* @param tablePath the table directory
* @return The HDFS blocks distribution for the given region.
* @throws IOException
*/
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException {
HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
FileSystem fs = tablePath.getFileSystem(conf);
HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
for (HColumnDescriptor family : tableDescriptor.getFamilies()) {
List<LocatedFileStatus> locatedFileStatusList = HRegionFileSystem.getStoreFilesLocatedStatus(regionFs, family.getNameAsString(), true);
if (locatedFileStatusList == null) {
continue;
}
for (LocatedFileStatus status : locatedFileStatusList) {
Path p = status.getPath();
if (StoreFileInfo.isReference(p) || HFileLink.isHFileLink(p)) {
// Only construct StoreFileInfo object if its not a hfile, save obj
// creation
StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs));
} else if (StoreFileInfo.isHFile(p)) {
// If its a HFile, then lets just add to the block distribution
// lets not create more objects here, not even another HDFSBlocksDistribution
FSUtils.addToHDFSBlocksDistribution(hdfsBlocksDistribution, status.getBlockLocations());
} else {
throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
}
}
}
return hdfsBlocksDistribution;
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class TestRegionLocationFinder method testInternalGetTopBlockLocation.
@Test
public void testInternalGetTopBlockLocation() throws Exception {
for (int i = 0; i < ServerNum; i++) {
HRegionServer server = cluster.getRegionServer(i);
for (Region region : server.getOnlineRegions(tableName)) {
// get region's hdfs block distribution by region and RegionLocationFinder,
// they should have same result
HDFSBlocksDistribution blocksDistribution1 = region.getHDFSBlocksDistribution();
HDFSBlocksDistribution blocksDistribution2 = finder.getBlockDistribution(region.getRegionInfo());
assertEquals(blocksDistribution1.getUniqueBlocksTotalWeight(), blocksDistribution2.getUniqueBlocksTotalWeight());
if (blocksDistribution1.getUniqueBlocksTotalWeight() != 0) {
assertEquals(blocksDistribution1.getTopHosts().get(0), blocksDistribution2.getTopHosts().get(0));
}
}
}
}
use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.
the class TestHFileOutputFormat2 method doIncrementalLoadTest.
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, List<String> tableStr) throws Exception {
util = new HBaseTestingUtil();
Configuration conf = util.getConfiguration();
conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
int hostCount = 1;
int regionNum = 5;
if (shouldKeepLocality) {
// We should change host count higher than hdfs replica count when MiniHBaseCluster supports
// explicit hostnames parameter just like MiniDFSCluster does.
hostCount = 3;
regionNum = 20;
}
String[] hostnames = new String[hostCount];
for (int i = 0; i < hostCount; ++i) {
hostnames[i] = "datanode_" + i;
}
StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(hostCount).dataNodeHosts(hostnames).build();
util.startMiniCluster(option);
Map<String, Table> allTables = new HashMap<>(tableStr.size());
List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
boolean writeMultipleTables = tableStr.size() > 1;
for (String tableStrSingle : tableStr) {
byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
TableName tableName = TableName.valueOf(tableStrSingle);
Table table = util.createTable(tableName, FAMILIES, splitKeys);
RegionLocator r = util.getConnection().getRegionLocator(tableName);
assertEquals("Should start with empty table", 0, util.countRows(table));
int numRegions = r.getStartKeys().length;
assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
allTables.put(tableStrSingle, table);
tableInfo.add(new HFileOutputFormat2.TableInfo(table.getDescriptor(), r));
}
Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
// Generate the bulk load files
runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
if (writeMultipleTables) {
testDir = new Path(testDir, "default");
}
for (Table tableSingle : allTables.values()) {
// This doesn't write into the table, just makes files
assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
}
int numTableDirs = 0;
FileStatus[] fss = testDir.getFileSystem(conf).listStatus(testDir);
for (FileStatus tf : fss) {
Path tablePath = testDir;
if (writeMultipleTables) {
if (allTables.containsKey(tf.getPath().getName())) {
++numTableDirs;
tablePath = tf.getPath();
} else {
continue;
}
}
// Make sure that a directory was created for every CF
int dir = 0;
fss = tablePath.getFileSystem(conf).listStatus(tablePath);
for (FileStatus f : fss) {
for (byte[] family : FAMILIES) {
if (Bytes.toString(family).equals(f.getPath().getName())) {
++dir;
}
}
}
assertEquals("Column family not found in FS.", FAMILIES.length, dir);
}
if (writeMultipleTables) {
assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
}
Admin admin = util.getConnection().getAdmin();
try {
// handle the split case
if (shouldChangeRegions) {
Table chosenTable = allTables.values().iterator().next();
// Choose a semi-random table if multiple tables are available
LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
admin.disableTable(chosenTable.getName());
util.waitUntilNoRegionsInTransition();
util.deleteTable(chosenTable.getName());
byte[][] newSplitKeys = generateRandomSplitKeys(14);
Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
while (util.getConnection().getRegionLocator(chosenTable.getName()).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
Thread.sleep(200);
LOG.info("Waiting for new region assignment to happen");
}
}
// Perform the actual load
for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
Path tableDir = testDir;
String tableNameStr = singleTableInfo.getTableDescriptor().getTableName().getNameAsString();
LOG.info("Running BulkLoadHFiles on table" + tableNameStr);
if (writeMultipleTables) {
tableDir = new Path(testDir, tableNameStr);
}
Table currentTable = allTables.get(tableNameStr);
TableName currentTableName = currentTable.getName();
BulkLoadHFiles.create(conf).bulkLoad(currentTableName, tableDir);
// Ensure data shows up
int expectedRows = 0;
if (putSortReducer) {
// no rows should be extracted
assertEquals("BulkLoadHFiles should put expected data in table", expectedRows, util.countRows(currentTable));
} else {
expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
assertEquals("BulkLoadHFiles should put expected data in table", expectedRows, util.countRows(currentTable));
Scan scan = new Scan();
ResultScanner results = currentTable.getScanner(scan);
for (Result res : results) {
assertEquals(FAMILIES.length, res.rawCells().length);
Cell first = res.rawCells()[0];
for (Cell kv : res.rawCells()) {
assertTrue(CellUtil.matchingRows(first, kv));
assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
}
}
results.close();
}
String tableDigestBefore = util.checksumRows(currentTable);
// Check region locality
HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
hbd.add(region.getHDFSBlocksDistribution());
}
for (String hostname : hostnames) {
float locality = hbd.getBlockLocalityIndex(hostname);
LOG.info("locality of [" + hostname + "]: " + locality);
assertEquals(100, (int) (locality * 100));
}
// Cause regions to reopen
admin.disableTable(currentTableName);
while (!admin.isTableDisabled(currentTableName)) {
Thread.sleep(200);
LOG.info("Waiting for table to disable");
}
admin.enableTable(currentTableName);
util.waitTableAvailable(currentTableName);
assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(currentTable));
}
} finally {
for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
tableInfoSingle.getRegionLocator().close();
}
for (Entry<String, Table> singleTable : allTables.entrySet()) {
singleTable.getValue().close();
util.deleteTable(singleTable.getValue().getName());
}
testDir.getFileSystem(conf).delete(testDir, true);
util.shutdownMiniCluster();
}
}
Aggregations