Search in sources :

Example 1 with LoadQueueItem

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem in project hbase by apache.

the class TestLoadIncrementalHFiles method runTest.

private void runTest(String testName, HTableDescriptor htd, BloomType bloomType, boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap, boolean copyFiles) throws Exception {
    Path dir = util.getDataTestDirOnTestFS(testName);
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
    int hfileIdx = 0;
    Map<byte[], List<Path>> map = null;
    List<Path> list = null;
    if (useMap || copyFiles) {
        list = new ArrayList<>();
    }
    if (useMap) {
        map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        map.put(FAMILY, list);
    }
    Path last = null;
    for (byte[][] range : hfileRanges) {
        byte[] from = range[0];
        byte[] to = range[1];
        Path path = new Path(familyDir, "hfile_" + hfileIdx++);
        HFileTestUtil.createHFile(util.getConfiguration(), fs, path, FAMILY, QUALIFIER, from, to, 1000);
        if (useMap) {
            last = path;
            list.add(path);
        }
    }
    int expectedRows = hfileIdx * 1000;
    if (preCreateTable || map != null) {
        util.getAdmin().createTable(htd, tableSplitKeys);
    }
    final TableName tableName = htd.getTableName();
    Configuration conf = util.getConfiguration();
    if (copyFiles) {
        conf.setBoolean(LoadIncrementalHFiles.ALWAYS_COPY_FILES, true);
    }
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String[] args = { dir.toString(), tableName.toString() };
    if (useMap) {
        fs.delete(last);
        Map<LoadQueueItem, ByteBuffer> loaded = loader.run(null, map, tableName);
        expectedRows -= 1000;
        for (LoadQueueItem item : loaded.keySet()) {
            if (item.hfilePath.getName().equals(last.getName())) {
                fail(last + " should be missing");
            }
        }
    } else {
        loader.run(args);
    }
    if (copyFiles) {
        for (Path p : list) {
            assertTrue(fs.exists(p));
        }
    }
    Table table = util.getConnection().getTable(tableName);
    try {
        assertEquals(expectedRows, util.countRows(table));
    } finally {
        table.close();
    }
    // verify staging folder has been cleaned up
    Path stagingBasePath = new Path(FSUtils.getRootDir(util.getConfiguration()), HConstants.BULKLOAD_STAGING_DIR_NAME);
    if (fs.exists(stagingBasePath)) {
        FileStatus[] files = fs.listStatus(stagingBasePath);
        for (FileStatus file : files) {
            assertTrue("Folder=" + file.getPath() + " is not cleaned up.", file.getPath().getName() != "DONOTERASE");
        }
    }
    util.deleteTable(tableName);
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ByteBuffer(java.nio.ByteBuffer) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) LoadQueueItem(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem)

Example 2 with LoadQueueItem

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem in project hbase by apache.

the class HFileReplicator method replicate.

public Void replicate() throws IOException {
    // Copy all the hfiles to the local file system
    Map<String, Path> tableStagingDirsMap = copyHFilesToStagingDir();
    int maxRetries = conf.getInt(HConstants.BULKLOAD_MAX_RETRIES_NUMBER, 10);
    for (Entry<String, Path> tableStagingDir : tableStagingDirsMap.entrySet()) {
        String tableNameString = tableStagingDir.getKey();
        Path stagingDir = tableStagingDir.getValue();
        LoadIncrementalHFiles loadHFiles = null;
        try {
            loadHFiles = new LoadIncrementalHFiles(conf);
        } catch (Exception e) {
            LOG.error("Failed to initialize LoadIncrementalHFiles for replicating bulk loaded" + " data.", e);
            throw new IOException(e);
        }
        Configuration newConf = HBaseConfiguration.create(conf);
        newConf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
        loadHFiles.setConf(newConf);
        TableName tableName = TableName.valueOf(tableNameString);
        Table table = this.connection.getTable(tableName);
        // Prepare collection of queue of hfiles to be loaded(replicated)
        Deque<LoadQueueItem> queue = new LinkedList<>();
        loadHFiles.prepareHFileQueue(stagingDir, table, queue, false);
        if (queue.isEmpty()) {
            LOG.warn("Replication process did not find any files to replicate in directory " + stagingDir.toUri());
            return null;
        }
        try (RegionLocator locator = connection.getRegionLocator(tableName)) {
            fsDelegationToken.acquireDelegationToken(sinkFs);
            // Set the staging directory which will be used by LoadIncrementalHFiles for loading the
            // data
            loadHFiles.setBulkToken(stagingDir.toString());
            doBulkLoad(loadHFiles, table, queue, locator, maxRetries);
        } finally {
            cleanup(stagingDir.toString(), table);
        }
    }
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ExecutionException(java.util.concurrent.ExecutionException) LinkedList(java.util.LinkedList) TableName(org.apache.hadoop.hbase.TableName) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) LoadQueueItem(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 TableName (org.apache.hadoop.hbase.TableName)2 Table (org.apache.hadoop.hbase.client.Table)2 LoadQueueItem (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 ExecutionException (java.util.concurrent.ExecutionException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)1 LoadIncrementalHFiles (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles)1