Search in sources :

Example 1 with Multimap

use of org.apache.hbase.thirdparty.com.google.common.collect.Multimap in project hbase by apache.

the class TestBulkLoadHFilesSplitRecovery method testBulkLoadPhaseFailure.

/**
 * Test that shows that exception thrown from the RS side will result in an exception on the
 * LIHFile client.
 */
@Test(expected = IOException.class)
public void testBulkLoadPhaseFailure() throws Exception {
    final TableName table = TableName.valueOf(name.getMethodName());
    final AtomicInteger attemptedCalls = new AtomicInteger();
    Configuration conf = new Configuration(util.getConfiguration());
    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
    BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf) {

        @Override
        protected void bulkLoadPhase(AsyncClusterConnection conn, TableName tableName, Deque<LoadQueueItem> queue, Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFiles, Map<LoadQueueItem, ByteBuffer> item2RegionMap) throws IOException {
            AsyncClusterConnection c = attemptedCalls.incrementAndGet() == 1 ? mockAndInjectError(conn) : conn;
            super.bulkLoadPhase(c, tableName, queue, regionGroups, copyFiles, item2RegionMap);
        }
    };
    Path dir = buildBulkFiles(table, 1);
    loader.bulkLoad(table, dir);
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) Multimap(org.apache.hbase.thirdparty.com.google.common.collect.Multimap) Configuration(org.apache.hadoop.conf.Configuration) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AsyncClusterConnection(org.apache.hadoop.hbase.client.AsyncClusterConnection) Deque(java.util.Deque) Map(java.util.Map) Test(org.junit.Test)

Example 2 with Multimap

use of org.apache.hbase.thirdparty.com.google.common.collect.Multimap in project hbase by apache.

the class TestSecureBulkLoadManager method doBulkloadWithoutRetry.

private void doBulkloadWithoutRetry(Path dir) throws Exception {
    BulkLoadHFilesTool h = new BulkLoadHFilesTool(conf) {

        @Override
        protected void bulkLoadPhase(AsyncClusterConnection conn, TableName tableName, Deque<LoadQueueItem> queue, Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFiles, Map<LoadQueueItem, ByteBuffer> item2RegionMap) throws IOException {
            super.bulkLoadPhase(conn, tableName, queue, regionGroups, copyFiles, item2RegionMap);
            // throw exception to avoid retry
            throw new MyExceptionToAvoidRetry();
        }
    };
    try {
        h.bulkLoad(TABLE, dir);
        Assert.fail("MyExceptionToAvoidRetry is expected");
    } catch (MyExceptionToAvoidRetry e) {
    // expected
    }
}
Also used : TableName(org.apache.hadoop.hbase.TableName) Multimap(org.apache.hbase.thirdparty.com.google.common.collect.Multimap) BulkLoadHFilesTool(org.apache.hadoop.hbase.tool.BulkLoadHFilesTool) AsyncClusterConnection(org.apache.hadoop.hbase.client.AsyncClusterConnection) Deque(java.util.Deque) Map(java.util.Map)

Example 3 with Multimap

use of org.apache.hbase.thirdparty.com.google.common.collect.Multimap in project hbase by apache.

the class BulkLoadHFilesTool method groupOrSplitPhase.

/**
 * @param conn the HBase cluster connection
 * @param tableName the table name of the table to load into
 * @param pool the ExecutorService
 * @param queue the queue for LoadQueueItem
 * @param startEndKeys start and end keys
 * @return A map that groups LQI by likely bulk load region targets and Set of missing hfiles.
 */
private Pair<Multimap<ByteBuffer, LoadQueueItem>, Set<String>> groupOrSplitPhase(AsyncClusterConnection conn, TableName tableName, ExecutorService pool, Deque<LoadQueueItem> queue, List<Pair<byte[], byte[]>> startEndKeys) throws IOException {
    // <region start key, LQI> need synchronized only within this scope of this
    // phase because of the puts that happen in futures.
    Multimap<ByteBuffer, LoadQueueItem> rgs = HashMultimap.create();
    final Multimap<ByteBuffer, LoadQueueItem> regionGroups = Multimaps.synchronizedMultimap(rgs);
    Set<String> missingHFiles = new HashSet<>();
    Pair<Multimap<ByteBuffer, LoadQueueItem>, Set<String>> pair = new Pair<>(regionGroups, missingHFiles);
    // drain LQIs and figure out bulk load groups
    Set<Future<Pair<List<LoadQueueItem>, String>>> splittingFutures = new HashSet<>();
    while (!queue.isEmpty()) {
        final LoadQueueItem item = queue.remove();
        final Callable<Pair<List<LoadQueueItem>, String>> call = () -> groupOrSplit(conn, tableName, regionGroups, item, startEndKeys);
        splittingFutures.add(pool.submit(call));
    }
    // we can attempt the atomic loads.
    for (Future<Pair<List<LoadQueueItem>, String>> lqis : splittingFutures) {
        try {
            Pair<List<LoadQueueItem>, String> splits = lqis.get();
            if (splits != null) {
                if (splits.getFirst() != null) {
                    queue.addAll(splits.getFirst());
                } else {
                    missingHFiles.add(splits.getSecond());
                }
            }
        } catch (ExecutionException e1) {
            Throwable t = e1.getCause();
            if (t instanceof IOException) {
                LOG.error("IOException during splitting", e1);
                // would have been thrown if not parallelized,
                throw (IOException) t;
            }
            LOG.error("Unexpected execution exception during splitting", e1);
            throw new IllegalStateException(t);
        } catch (InterruptedException e1) {
            LOG.error("Unexpected interrupted exception during splitting", e1);
            throw (InterruptedIOException) new InterruptedIOException().initCause(e1);
        }
    }
    return pair;
}
Also used : InterruptedIOException(java.io.InterruptedIOException) Set(java.util.Set) HashSet(java.util.HashSet) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Multimap(org.apache.hbase.thirdparty.com.google.common.collect.Multimap) HashMultimap(org.apache.hbase.thirdparty.com.google.common.collect.HashMultimap) Future(java.util.concurrent.Future) CompletableFuture(java.util.concurrent.CompletableFuture) List(java.util.List) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair)

Example 4 with Multimap

use of org.apache.hbase.thirdparty.com.google.common.collect.Multimap in project hbase by apache.

the class TestBulkLoadHFilesSplitRecovery method testSplitWhileBulkLoadPhase.

/**
 * This test exercises the path where there is a split after initial validation but before the
 * atomic bulk load call. We cannot use presplitting to test this path, so we actually inject a
 * split just before the atomic region load.
 */
@Test
public void testSplitWhileBulkLoadPhase() throws Exception {
    final TableName table = TableName.valueOf(name.getMethodName());
    setupTable(util.getConnection(), table, 10);
    populateTable(util.getConnection(), table, 1);
    assertExpectedTable(table, ROWCOUNT, 1);
    // Now let's cause trouble. This will occur after checks and cause bulk
    // files to fail when attempt to atomically import. This is recoverable.
    final AtomicInteger attemptedCalls = new AtomicInteger();
    BulkLoadHFilesTool loader = new BulkLoadHFilesTool(util.getConfiguration()) {

        @Override
        protected void bulkLoadPhase(AsyncClusterConnection conn, TableName tableName, Deque<LoadQueueItem> queue, Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFiles, Map<LoadQueueItem, ByteBuffer> item2RegionMap) throws IOException {
            int i = attemptedCalls.incrementAndGet();
            if (i == 1) {
                // On first attempt force a split.
                forceSplit(table);
            }
            super.bulkLoadPhase(conn, tableName, queue, regionGroups, copyFiles, item2RegionMap);
        }
    };
    // create HFiles for different column families
    Path dir = buildBulkFiles(table, 2);
    loader.bulkLoad(table, dir);
    // check that data was loaded
    // The three expected attempts are 1) failure because need to split, 2)
    // load of split top 3) load of split bottom
    assertEquals(3, attemptedCalls.get());
    assertExpectedTable(table, ROWCOUNT, 2);
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) Multimap(org.apache.hbase.thirdparty.com.google.common.collect.Multimap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AsyncClusterConnection(org.apache.hadoop.hbase.client.AsyncClusterConnection) Deque(java.util.Deque) Map(java.util.Map) Test(org.junit.Test)

Example 5 with Multimap

use of org.apache.hbase.thirdparty.com.google.common.collect.Multimap in project hbase by apache.

the class TestBulkLoadHFilesSplitRecovery method testCorrectSplitPoint.

@Test
public void testCorrectSplitPoint() throws Exception {
    final TableName table = TableName.valueOf(name.getMethodName());
    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"), Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"), Bytes.toBytes("row_00000040"), Bytes.toBytes("row_00000050"), Bytes.toBytes("row_00000060"), Bytes.toBytes("row_00000070") };
    setupTableWithSplitkeys(table, NUM_CFS, SPLIT_KEYS);
    final AtomicInteger bulkloadRpcTimes = new AtomicInteger();
    BulkLoadHFilesTool loader = new BulkLoadHFilesTool(util.getConfiguration()) {

        @Override
        protected void bulkLoadPhase(AsyncClusterConnection conn, TableName tableName, Deque<LoadQueueItem> queue, Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFiles, Map<LoadQueueItem, ByteBuffer> item2RegionMap) throws IOException {
            bulkloadRpcTimes.addAndGet(1);
            super.bulkLoadPhase(conn, tableName, queue, regionGroups, copyFiles, item2RegionMap);
        }
    };
    Path dir = buildBulkFiles(table, 1);
    loader.bulkLoad(table, dir);
    // before HBASE-25281 we need invoke bulkload rpc 8 times
    assertEquals(4, bulkloadRpcTimes.get());
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) Multimap(org.apache.hbase.thirdparty.com.google.common.collect.Multimap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AsyncClusterConnection(org.apache.hadoop.hbase.client.AsyncClusterConnection) Deque(java.util.Deque) Map(java.util.Map) Test(org.junit.Test)

Aggregations

Multimap (org.apache.hbase.thirdparty.com.google.common.collect.Multimap)9 TableName (org.apache.hadoop.hbase.TableName)8 AsyncClusterConnection (org.apache.hadoop.hbase.client.AsyncClusterConnection)8 Path (org.apache.hadoop.fs.Path)7 Test (org.junit.Test)7 Deque (java.util.Deque)6 Map (java.util.Map)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 List (java.util.List)3 IOException (java.io.IOException)2 Configuration (org.apache.hadoop.conf.Configuration)2 ArgumentMatchers.anyList (org.mockito.ArgumentMatchers.anyList)2 InterruptedIOException (java.io.InterruptedIOException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1