Search in sources :

Example 16 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class RegionLocationFinder method refreshAndWait.

public void refreshAndWait(Collection<HRegionInfo> hris) {
    ArrayList<ListenableFuture<HDFSBlocksDistribution>> regionLocationFutures = new ArrayList<>(hris.size());
    for (HRegionInfo hregionInfo : hris) {
        regionLocationFutures.add(asyncGetBlockDistribution(hregionInfo));
    }
    int index = 0;
    for (HRegionInfo hregionInfo : hris) {
        ListenableFuture<HDFSBlocksDistribution> future = regionLocationFutures.get(index);
        try {
            cache.put(hregionInfo, future.get());
        } catch (InterruptedException ite) {
            Thread.currentThread().interrupt();
        } catch (ExecutionException ee) {
            LOG.debug("ExecutionException during HDFSBlocksDistribution computation. for region = " + hregionInfo.getEncodedName(), ee);
        }
        index++;
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ExecutionException(java.util.concurrent.ExecutionException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 17 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestFSUtils method testcomputeHDFSBlocksDistribution.

@Test
public void testcomputeHDFSBlocksDistribution() throws Exception {
    HBaseTestingUtility htu = new HBaseTestingUtility();
    final int DEFAULT_BLOCK_SIZE = 1024;
    htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
    MiniDFSCluster cluster = null;
    Path testFile = null;
    try {
        // set up a cluster with 3 nodes
        String[] hosts = new String[] { "host1", "host2", "host3" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with two blocks
        testFile = new Path("/test1.txt");
        WriteDataToHDFS(fs, testFile, 2 * DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, the same as the number of
        // datanodes; the locality index for each host should be 100%,
        // or getWeight for each host should be the same as getUniqueBlocksWeights
        final long maxTime = System.currentTimeMillis() + 2000;
        boolean ok;
        do {
            ok = true;
            FileStatus status = fs.getFileStatus(testFile);
            HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
            long uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
            for (String host : hosts) {
                long weight = blocksDistribution.getWeight(host);
                ok = (ok && uniqueBlocksTotalWeight == weight);
            }
        } while (!ok && System.currentTimeMillis() < maxTime);
        assertTrue(ok);
    } finally {
        htu.shutdownMiniDFSCluster();
    }
    try {
        // set up a cluster with 4 nodes
        String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with three blocks
        testFile = new Path("/test2.txt");
        WriteDataToHDFS(fs, testFile, 3 * DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, we will have total of 9
        // replica of blocks; thus the host with the highest weight should have
        // weight == 3 * DEFAULT_BLOCK_SIZE
        final long maxTime = System.currentTimeMillis() + 2000;
        long weight;
        long uniqueBlocksTotalWeight;
        do {
            FileStatus status = fs.getFileStatus(testFile);
            HDFSBlocksDistribution blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
            uniqueBlocksTotalWeight = blocksDistribution.getUniqueBlocksTotalWeight();
            String tophost = blocksDistribution.getTopHosts().get(0);
            weight = blocksDistribution.getWeight(tophost);
        // NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
        } while (uniqueBlocksTotalWeight != weight && System.currentTimeMillis() < maxTime);
        assertTrue(uniqueBlocksTotalWeight == weight);
    } finally {
        htu.shutdownMiniDFSCluster();
    }
    try {
        // set up a cluster with 4 nodes
        String[] hosts = new String[] { "host1", "host2", "host3", "host4" };
        cluster = htu.startMiniDFSCluster(hosts);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create a file with one block
        testFile = new Path("/test3.txt");
        WriteDataToHDFS(fs, testFile, DEFAULT_BLOCK_SIZE);
        // given the default replication factor is 3, we will have total of 3
        // replica of blocks; thus there is one host without weight
        final long maxTime = System.currentTimeMillis() + 2000;
        HDFSBlocksDistribution blocksDistribution;
        do {
            FileStatus status = fs.getFileStatus(testFile);
            blocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
        // NameNode is informed asynchronously, so we may have a delay. See HBASE-6175
        } while (blocksDistribution.getTopHosts().size() != 3 && System.currentTimeMillis() < maxTime);
        assertEquals("Wrong number of hosts distributing blocks.", 3, blocksDistribution.getTopHosts().size());
    } finally {
        htu.shutdownMiniDFSCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileStatus(org.apache.hadoop.fs.FileStatus) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) Test(org.junit.Test)

Example 18 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class HRegion method computeHDFSBlocksDistribution.

/**
   * This is a helper function to compute HDFS block distribution on demand
   * @param conf configuration
   * @param tableDescriptor HTableDescriptor of the table
   * @param regionInfo encoded name of the region
   * @param tablePath the table directory
   * @return The HDFS blocks distribution for the given region.
   * @throws IOException
   */
public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException {
    HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
    FileSystem fs = tablePath.getFileSystem(conf);
    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
    for (HColumnDescriptor family : tableDescriptor.getFamilies()) {
        List<LocatedFileStatus> locatedFileStatusList = HRegionFileSystem.getStoreFilesLocatedStatus(regionFs, family.getNameAsString(), true);
        if (locatedFileStatusList == null) {
            continue;
        }
        for (LocatedFileStatus status : locatedFileStatusList) {
            Path p = status.getPath();
            if (StoreFileInfo.isReference(p) || HFileLink.isHFileLink(p)) {
                // Only construct StoreFileInfo object if its not a hfile, save obj
                // creation
                StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
                hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs));
            } else if (StoreFileInfo.isHFile(p)) {
                // If its a HFile, then lets just add to the block distribution
                // lets not create more objects here, not even another HDFSBlocksDistribution
                FSUtils.addToHDFSBlocksDistribution(hdfsBlocksDistribution, status.getBlockLocations());
            } else {
                throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
            }
        }
    }
    return hdfsBlocksDistribution;
}
Also used : Path(org.apache.hadoop.fs.Path) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution)

Example 19 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestRegionLocationFinder method testInternalGetTopBlockLocation.

@Test
public void testInternalGetTopBlockLocation() throws Exception {
    for (int i = 0; i < ServerNum; i++) {
        HRegionServer server = cluster.getRegionServer(i);
        for (Region region : server.getOnlineRegions(tableName)) {
            // get region's hdfs block distribution by region and RegionLocationFinder, 
            // they should have same result
            HDFSBlocksDistribution blocksDistribution1 = region.getHDFSBlocksDistribution();
            HDFSBlocksDistribution blocksDistribution2 = finder.getBlockDistribution(region.getRegionInfo());
            assertEquals(blocksDistribution1.getUniqueBlocksTotalWeight(), blocksDistribution2.getUniqueBlocksTotalWeight());
            if (blocksDistribution1.getUniqueBlocksTotalWeight() != 0) {
                assertEquals(blocksDistribution1.getTopHosts().get(0), blocksDistribution2.getTopHosts().get(0));
            }
        }
    }
}
Also used : Region(org.apache.hadoop.hbase.regionserver.Region) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Test(org.junit.Test)

Example 20 with HDFSBlocksDistribution

use of org.apache.hadoop.hbase.HDFSBlocksDistribution in project hbase by apache.

the class TestHFileOutputFormat2 method doIncrementalLoadTest.

private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, List<String> tableStr) throws Exception {
    util = new HBaseTestingUtil();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
        // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
        // explicit hostnames parameter just like MiniDFSCluster does.
        hostCount = 3;
        regionNum = 20;
    }
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
        hostnames[i] = "datanode_" + i;
    }
    StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(hostCount).dataNodeHosts(hostnames).build();
    util.startMiniCluster(option);
    Map<String, Table> allTables = new HashMap<>(tableStr.size());
    List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
    boolean writeMultipleTables = tableStr.size() > 1;
    for (String tableStrSingle : tableStr) {
        byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
        TableName tableName = TableName.valueOf(tableStrSingle);
        Table table = util.createTable(tableName, FAMILIES, splitKeys);
        RegionLocator r = util.getConnection().getRegionLocator(tableName);
        assertEquals("Should start with empty table", 0, util.countRows(table));
        int numRegions = r.getStartKeys().length;
        assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
        allTables.put(tableStrSingle, table);
        tableInfo.add(new HFileOutputFormat2.TableInfo(table.getDescriptor(), r));
    }
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    // Generate the bulk load files
    runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
    if (writeMultipleTables) {
        testDir = new Path(testDir, "default");
    }
    for (Table tableSingle : allTables.values()) {
        // This doesn't write into the table, just makes files
        assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
    }
    int numTableDirs = 0;
    FileStatus[] fss = testDir.getFileSystem(conf).listStatus(testDir);
    for (FileStatus tf : fss) {
        Path tablePath = testDir;
        if (writeMultipleTables) {
            if (allTables.containsKey(tf.getPath().getName())) {
                ++numTableDirs;
                tablePath = tf.getPath();
            } else {
                continue;
            }
        }
        // Make sure that a directory was created for every CF
        int dir = 0;
        fss = tablePath.getFileSystem(conf).listStatus(tablePath);
        for (FileStatus f : fss) {
            for (byte[] family : FAMILIES) {
                if (Bytes.toString(family).equals(f.getPath().getName())) {
                    ++dir;
                }
            }
        }
        assertEquals("Column family not found in FS.", FAMILIES.length, dir);
    }
    if (writeMultipleTables) {
        assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
    }
    Admin admin = util.getConnection().getAdmin();
    try {
        // handle the split case
        if (shouldChangeRegions) {
            Table chosenTable = allTables.values().iterator().next();
            // Choose a semi-random table if multiple tables are available
            LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
            admin.disableTable(chosenTable.getName());
            util.waitUntilNoRegionsInTransition();
            util.deleteTable(chosenTable.getName());
            byte[][] newSplitKeys = generateRandomSplitKeys(14);
            Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
            while (util.getConnection().getRegionLocator(chosenTable.getName()).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
                Thread.sleep(200);
                LOG.info("Waiting for new region assignment to happen");
            }
        }
        // Perform the actual load
        for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
            Path tableDir = testDir;
            String tableNameStr = singleTableInfo.getTableDescriptor().getTableName().getNameAsString();
            LOG.info("Running BulkLoadHFiles on table" + tableNameStr);
            if (writeMultipleTables) {
                tableDir = new Path(testDir, tableNameStr);
            }
            Table currentTable = allTables.get(tableNameStr);
            TableName currentTableName = currentTable.getName();
            BulkLoadHFiles.create(conf).bulkLoad(currentTableName, tableDir);
            // Ensure data shows up
            int expectedRows = 0;
            if (putSortReducer) {
                // no rows should be extracted
                assertEquals("BulkLoadHFiles should put expected data in table", expectedRows, util.countRows(currentTable));
            } else {
                expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
                assertEquals("BulkLoadHFiles should put expected data in table", expectedRows, util.countRows(currentTable));
                Scan scan = new Scan();
                ResultScanner results = currentTable.getScanner(scan);
                for (Result res : results) {
                    assertEquals(FAMILIES.length, res.rawCells().length);
                    Cell first = res.rawCells()[0];
                    for (Cell kv : res.rawCells()) {
                        assertTrue(CellUtil.matchingRows(first, kv));
                        assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
                    }
                }
                results.close();
            }
            String tableDigestBefore = util.checksumRows(currentTable);
            // Check region locality
            HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
            for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
                hbd.add(region.getHDFSBlocksDistribution());
            }
            for (String hostname : hostnames) {
                float locality = hbd.getBlockLocalityIndex(hostname);
                LOG.info("locality of [" + hostname + "]: " + locality);
                assertEquals(100, (int) (locality * 100));
            }
            // Cause regions to reopen
            admin.disableTable(currentTableName);
            while (!admin.isTableDisabled(currentTableName)) {
                Thread.sleep(200);
                LOG.info("Waiting for table to disable");
            }
            admin.enableTable(currentTableName);
            util.waitTableAvailable(currentTableName);
            assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(currentTable));
        }
    } finally {
        for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
            tableInfoSingle.getRegionLocator().close();
        }
        for (Entry<String, Table> singleTable : allTables.entrySet()) {
            singleTable.getValue().close();
            util.deleteTable(singleTable.getValue().getName());
        }
        testDir.getFileSystem(conf).delete(testDir, true);
        util.shutdownMiniCluster();
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Result(org.apache.hadoop.hbase.client.Result) Cell(org.apache.hadoop.hbase.Cell) Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) Admin(org.apache.hadoop.hbase.client.Admin) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) Scan(org.apache.hadoop.hbase.client.Scan) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption)

Aggregations

HDFSBlocksDistribution (org.apache.hadoop.hbase.HDFSBlocksDistribution)29 Test (org.junit.Test)11 Path (org.apache.hadoop.fs.Path)8 ArrayList (java.util.ArrayList)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 HashMap (java.util.HashMap)5 ExecutionException (java.util.concurrent.ExecutionException)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)5 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 FileStatus (org.apache.hadoop.fs.FileStatus)4 InterruptedIOException (java.io.InterruptedIOException)3 Cell (org.apache.hadoop.hbase.Cell)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 Result (org.apache.hadoop.hbase.client.Result)3 Scan (org.apache.hadoop.hbase.client.Scan)3 TimeoutIOException (org.apache.hadoop.hbase.exceptions.TimeoutIOException)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2