Search in sources :

Example 11 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class HFileReplicator method replicate.

public Void replicate() throws IOException {
    // Copy all the hfiles to the local file system
    Map<String, Path> tableStagingDirsMap = copyHFilesToStagingDir();
    int maxRetries = conf.getInt(HConstants.BULKLOAD_MAX_RETRIES_NUMBER, 10);
    for (Entry<String, Path> tableStagingDir : tableStagingDirsMap.entrySet()) {
        String tableNameString = tableStagingDir.getKey();
        Path stagingDir = tableStagingDir.getValue();
        LoadIncrementalHFiles loadHFiles = null;
        try {
            loadHFiles = new LoadIncrementalHFiles(conf);
        } catch (Exception e) {
            LOG.error("Failed to initialize LoadIncrementalHFiles for replicating bulk loaded" + " data.", e);
            throw new IOException(e);
        }
        Configuration newConf = HBaseConfiguration.create(conf);
        newConf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
        loadHFiles.setConf(newConf);
        TableName tableName = TableName.valueOf(tableNameString);
        Table table = this.connection.getTable(tableName);
        // Prepare collection of queue of hfiles to be loaded(replicated)
        Deque<LoadQueueItem> queue = new LinkedList<>();
        loadHFiles.prepareHFileQueue(stagingDir, table, queue, false);
        if (queue.isEmpty()) {
            LOG.warn("Replication process did not find any files to replicate in directory " + stagingDir.toUri());
            return null;
        }
        try (RegionLocator locator = connection.getRegionLocator(tableName)) {
            fsDelegationToken.acquireDelegationToken(sinkFs);
            // Set the staging directory which will be used by LoadIncrementalHFiles for loading the
            // data
            loadHFiles.setBulkToken(stagingDir.toString());
            doBulkLoad(loadHFiles, table, queue, locator, maxRetries);
        } finally {
            cleanup(stagingDir.toString(), table);
        }
    }
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ExecutionException(java.util.concurrent.ExecutionException) LinkedList(java.util.LinkedList) TableName(org.apache.hadoop.hbase.TableName) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) LoadQueueItem(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem)

Example 12 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class TestScannerWithBulkload method testBulkLoadWithParallelScan.

@Test
public void testBulkLoadWithParallelScan() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    final long l = System.currentTimeMillis();
    final Admin admin = TEST_UTIL.getAdmin();
    createTable(admin, tableName);
    Scan scan = createScan();
    scan.setCaching(1);
    final Table table = init(admin, l, scan, tableName);
    // use bulkload
    final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadWithParallelScan/", "/temp/testBulkLoadWithParallelScan/col/file", false);
    Configuration conf = TEST_UTIL.getConfiguration();
    conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
    final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
    ResultScanner scanner = table.getScanner(scan);
    Result result = scanner.next();
    // Create a scanner and then do bulk load
    final CountDownLatch latch = new CountDownLatch(1);
    new Thread() {

        public void run() {
            try {
                Put put1 = new Put(Bytes.toBytes("row5"));
                put1.add(new KeyValue(Bytes.toBytes("row5"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes.toBytes("version0")));
                table.put(put1);
                try (RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
                    bulkload.doBulkLoad(hfilePath, admin, table, locator);
                }
                latch.countDown();
            } catch (TableNotFoundException e) {
            } catch (IOException e) {
            }
        }
    }.start();
    latch.await();
    // By the time we do next() the bulk loaded files are also added to the kv
    // scanner
    scanAfterBulkLoad(scanner, result, "version1");
    scanner.close();
    table.close();
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) CountDownLatch(java.util.concurrent.CountDownLatch) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 13 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class TestScannerWithBulkload method testBulkLoadNativeHFile.

@Test
public void testBulkLoadNativeHFile() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    long l = System.currentTimeMillis();
    Admin admin = TEST_UTIL.getAdmin();
    createTable(admin, tableName);
    Scan scan = createScan();
    final Table table = init(admin, l, scan, tableName);
    // use bulkload
    final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadNativeHFile/", "/temp/testBulkLoadNativeHFile/col/file", true);
    Configuration conf = TEST_UTIL.getConfiguration();
    conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
    final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
    try (RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        bulkload.doBulkLoad(hfilePath, admin, table, locator);
    }
    ResultScanner scanner = table.getScanner(scan);
    Result result = scanner.next();
    // We had 'version0', 'version1' for 'row1,col:q' in the table.
    // Bulk load added 'version2'  scanner should be able to see 'version2'
    result = scanAfterBulkLoad(scanner, result, "version2");
    Put put0 = new Put(Bytes.toBytes("row1"));
    put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes.toBytes("version3")));
    table.put(put0);
    admin.flush(tableName);
    scanner = table.getScanner(scan);
    result = scanner.next();
    while (result != null) {
        List<Cell> cells = result.getColumnCells(Bytes.toBytes("col"), Bytes.toBytes("q"));
        for (Cell _c : cells) {
            if (Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength()).equals("row1")) {
                System.out.println(Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength()));
                System.out.println(Bytes.toString(_c.getQualifierArray(), _c.getQualifierOffset(), _c.getQualifierLength()));
                System.out.println(Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
                Assert.assertEquals("version3", Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
            }
        }
        result = scanner.next();
    }
    scanner.close();
    table.close();
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) Admin(org.apache.hadoop.hbase.client.Admin) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Example 14 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class TestScannerWithBulkload method testBulkLoad.

@Test
public void testBulkLoad() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    long l = System.currentTimeMillis();
    Admin admin = TEST_UTIL.getAdmin();
    createTable(admin, tableName);
    Scan scan = createScan();
    final Table table = init(admin, l, scan, tableName);
    // use bulkload
    final Path hfilePath = writeToHFile(l, "/temp/testBulkLoad/", "/temp/testBulkLoad/col/file", false);
    Configuration conf = TEST_UTIL.getConfiguration();
    conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
    final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
    try (RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        bulkload.doBulkLoad(hfilePath, admin, table, locator);
    }
    ResultScanner scanner = table.getScanner(scan);
    Result result = scanner.next();
    result = scanAfterBulkLoad(scanner, result, "version2");
    Put put0 = new Put(Bytes.toBytes("row1"));
    put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes.toBytes("version3")));
    table.put(put0);
    admin.flush(tableName);
    scanner = table.getScanner(scan);
    result = scanner.next();
    while (result != null) {
        List<Cell> cells = result.getColumnCells(Bytes.toBytes("col"), Bytes.toBytes("q"));
        for (Cell _c : cells) {
            if (Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength()).equals("row1")) {
                System.out.println(Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength()));
                System.out.println(Bytes.toString(_c.getQualifierArray(), _c.getQualifierOffset(), _c.getQualifierLength()));
                System.out.println(Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
                Assert.assertEquals("version3", Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
            }
        }
        result = scanner.next();
    }
    scanner.close();
    table.close();
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) Admin(org.apache.hadoop.hbase.client.Admin) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Aggregations

LoadIncrementalHFiles (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles)14 Path (org.apache.hadoop.fs.Path)11 Configuration (org.apache.hadoop.conf.Configuration)8 IOException (java.io.IOException)7 TableName (org.apache.hadoop.hbase.TableName)7 Table (org.apache.hadoop.hbase.client.Table)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)5 FileNotFoundException (java.io.FileNotFoundException)4 Test (org.junit.Test)4 KeyValue (org.apache.hadoop.hbase.KeyValue)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Put (org.apache.hadoop.hbase.client.Put)3 Result (org.apache.hadoop.hbase.client.Result)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 Scan (org.apache.hadoop.hbase.client.Scan)3 Cell (org.apache.hadoop.hbase.Cell)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 HTable (org.apache.hadoop.hbase.client.HTable)2 InterruptedIOException (java.io.InterruptedIOException)1