Search in sources :

Example 1 with BulkLoadDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor in project hbase by apache.

the class ReplicationSourceWALReaderThread method countDistinctRowKeysAndHFiles.

/**
   * Count the number of different row keys in the given edit because of mini-batching. We assume
   * that there's at least one Cell in the WALEdit.
   * @param edit edit to count row keys from
   * @return number of different row keys and HFiles
   */
private Pair<Integer, Integer> countDistinctRowKeysAndHFiles(WALEdit edit) {
    List<Cell> cells = edit.getCells();
    int distinctRowKeys = 1;
    int totalHFileEntries = 0;
    Cell lastCell = cells.get(0);
    int totalCells = edit.size();
    for (int i = 0; i < totalCells; i++) {
        // Count HFiles to be replicated
        if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
            try {
                BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
                List<StoreDescriptor> stores = bld.getStoresList();
                int totalStores = stores.size();
                for (int j = 0; j < totalStores; j++) {
                    totalHFileEntries += stores.get(j).getStoreFileList().size();
                }
            } catch (IOException e) {
                LOG.error("Failed to deserialize bulk load entry from wal edit. " + "Then its hfiles count will not be added into metric.");
            }
        }
        if (!CellUtil.matchingRows(cells.get(i), lastCell)) {
            distinctRowKeys++;
        }
        lastCell = cells.get(i);
    }
    Pair<Integer, Integer> result = new Pair<>(distinctRowKeys, totalHFileEntries);
    return result;
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) IOException(java.io.IOException) Cell(org.apache.hadoop.hbase.Cell) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor) Pair(org.apache.hadoop.hbase.util.Pair)

Example 2 with BulkLoadDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor in project hbase by apache.

the class ReplicationSourceWALReaderThread method calculateTotalSizeOfStoreFiles.

/**
   * Calculate the total size of all the store files
   * @param edit edit to count row keys from
   * @return the total size of the store files
   */
private int calculateTotalSizeOfStoreFiles(WALEdit edit) {
    List<Cell> cells = edit.getCells();
    int totalStoreFilesSize = 0;
    int totalCells = edit.size();
    for (int i = 0; i < totalCells; i++) {
        if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
            try {
                BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
                List<StoreDescriptor> stores = bld.getStoresList();
                int totalStores = stores.size();
                for (int j = 0; j < totalStores; j++) {
                    totalStoreFilesSize += stores.get(j).getStoreFileSizeBytes();
                }
            } catch (IOException e) {
                LOG.error("Failed to deserialize bulk load entry from wal edit. " + "Size of HFiles part of cell will not be considered in replication " + "request size calculation.", e);
            }
        }
    }
    return totalStoreFilesSize;
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) IOException(java.io.IOException) Cell(org.apache.hadoop.hbase.Cell) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)

Example 3 with BulkLoadDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor in project hbase by apache.

the class BulkLoadCellFilter method filterCell.

/**
   * Filters the bulk load cell using the supplied predicate.
   * @param cell The WAL cell to filter.
   * @param famPredicate Returns true of given family should be removed.
   * @return The filtered cell.
   */
public Cell filterCell(Cell cell, Predicate<byte[]> famPredicate) {
    byte[] fam;
    BulkLoadDescriptor bld = null;
    try {
        bld = WALEdit.getBulkLoadDescriptor(cell);
    } catch (IOException e) {
        LOG.warn("Failed to get bulk load events information from the WAL file.", e);
        return cell;
    }
    List<StoreDescriptor> storesList = bld.getStoresList();
    // Copy the StoreDescriptor list and update it as storesList is a unmodifiableList
    List<StoreDescriptor> copiedStoresList = new ArrayList<>(storesList);
    Iterator<StoreDescriptor> copiedStoresListIterator = copiedStoresList.iterator();
    boolean anyStoreRemoved = false;
    while (copiedStoresListIterator.hasNext()) {
        StoreDescriptor sd = copiedStoresListIterator.next();
        fam = sd.getFamilyName().toByteArray();
        if (famPredicate.apply(fam)) {
            copiedStoresListIterator.remove();
            anyStoreRemoved = true;
        }
    }
    if (!anyStoreRemoved) {
        return cell;
    } else if (copiedStoresList.isEmpty()) {
        return null;
    }
    BulkLoadDescriptor.Builder newDesc = BulkLoadDescriptor.newBuilder().setTableName(bld.getTableName()).setEncodedRegionName(bld.getEncodedRegionName()).setBulkloadSeqNum(bld.getBulkloadSeqNum());
    newDesc.addAllStores(copiedStoresList);
    BulkLoadDescriptor newBulkLoadDescriptor = newDesc.build();
    return CellUtil.createCell(CellUtil.cloneRow(cell), WALEdit.METAFAMILY, WALEdit.BULK_LOAD, cell.getTimestamp(), cell.getTypeByte(), newBulkLoadDescriptor.toByteArray());
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)

Example 4 with BulkLoadDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor in project hbase by apache.

the class RSRpcServices method doReplayBatchOp.

/**
   * Execute a list of Put/Delete mutations. The function returns OperationStatus instead of
   * constructing MultiResponse to save a possible loop if caller doesn't need MultiResponse.
   * @param region
   * @param mutations
   * @param replaySeqId
   * @return an array of OperationStatus which internally contains the OperationStatusCode and the
   *         exceptionMessage if any
   * @throws IOException
   */
private OperationStatus[] doReplayBatchOp(final Region region, final List<WALSplitter.MutationReplay> mutations, long replaySeqId) throws IOException {
    long before = EnvironmentEdgeManager.currentTime();
    boolean batchContainsPuts = false, batchContainsDelete = false;
    try {
        for (Iterator<WALSplitter.MutationReplay> it = mutations.iterator(); it.hasNext(); ) {
            WALSplitter.MutationReplay m = it.next();
            if (m.type == MutationType.PUT) {
                batchContainsPuts = true;
            } else {
                batchContainsDelete = true;
            }
            NavigableMap<byte[], List<Cell>> map = m.mutation.getFamilyCellMap();
            List<Cell> metaCells = map.get(WALEdit.METAFAMILY);
            if (metaCells != null && !metaCells.isEmpty()) {
                for (Cell metaCell : metaCells) {
                    CompactionDescriptor compactionDesc = WALEdit.getCompaction(metaCell);
                    boolean isDefaultReplica = RegionReplicaUtil.isDefaultReplica(region.getRegionInfo());
                    HRegion hRegion = (HRegion) region;
                    if (compactionDesc != null) {
                        // replay the compaction. Remove the files from stores only if we are the primary
                        // region replica (thus own the files)
                        hRegion.replayWALCompactionMarker(compactionDesc, !isDefaultReplica, isDefaultReplica, replaySeqId);
                        continue;
                    }
                    FlushDescriptor flushDesc = WALEdit.getFlushDescriptor(metaCell);
                    if (flushDesc != null && !isDefaultReplica) {
                        hRegion.replayWALFlushMarker(flushDesc, replaySeqId);
                        continue;
                    }
                    RegionEventDescriptor regionEvent = WALEdit.getRegionEventDescriptor(metaCell);
                    if (regionEvent != null && !isDefaultReplica) {
                        hRegion.replayWALRegionEventMarker(regionEvent);
                        continue;
                    }
                    BulkLoadDescriptor bulkLoadEvent = WALEdit.getBulkLoadDescriptor(metaCell);
                    if (bulkLoadEvent != null) {
                        hRegion.replayWALBulkLoadEventMarker(bulkLoadEvent);
                        continue;
                    }
                }
                it.remove();
            }
        }
        requestCount.add(mutations.size());
        if (!region.getRegionInfo().isMetaTable()) {
            regionServer.cacheFlusher.reclaimMemStoreMemory();
        }
        return region.batchReplay(mutations.toArray(new WALSplitter.MutationReplay[mutations.size()]), replaySeqId);
    } finally {
        if (regionServer.metricsRegionServer != null) {
            long after = EnvironmentEdgeManager.currentTime();
            if (batchContainsPuts) {
                regionServer.metricsRegionServer.updatePut(after - before);
            }
            if (batchContainsDelete) {
                regionServer.metricsRegionServer.updateDelete(after - before);
            }
        }
    }
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) FlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor) RegionEventDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor) ArrayList(java.util.ArrayList) List(java.util.List) CompactionDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor) WALSplitter(org.apache.hadoop.hbase.wal.WALSplitter) Cell(org.apache.hadoop.hbase.Cell) ByteBufferCell(org.apache.hadoop.hbase.ByteBufferCell)

Example 5 with BulkLoadDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor in project hbase by apache.

the class TestHRegionReplayEvents method testReplayBulkLoadEvent.

/**
   * Tests replaying region open markers from primary region. Checks whether the files are picked up
   */
@Test
public void testReplayBulkLoadEvent() throws IOException {
    LOG.info("testReplayBulkLoadEvent starts");
    // no flush
    putDataWithFlushes(primaryRegion, 100, 0, 100);
    // close the region and open again.
    primaryRegion.close();
    primaryRegion = HRegion.openHRegion(rootDir, primaryHri, htd, walPrimary, CONF, rss, null);
    // bulk load a file into primary region
    Random random = new Random();
    byte[] randomValues = new byte[20];
    random.nextBytes(randomValues);
    Path testPath = TEST_UTIL.getDataTestDirOnTestFS();
    List<Pair<byte[], String>> familyPaths = new ArrayList<>();
    int expectedLoadFileCount = 0;
    for (byte[] family : families) {
        familyPaths.add(new Pair<>(family, createHFileForFamilies(testPath, family, randomValues)));
        expectedLoadFileCount++;
    }
    primaryRegion.bulkLoadHFiles(familyPaths, false, null);
    // now replay the edits and the bulk load marker
    reader = createWALReaderForPrimary();
    LOG.info("-- Replaying edits and region events in secondary");
    BulkLoadDescriptor bulkloadEvent = null;
    while (true) {
        WAL.Entry entry = reader.next();
        if (entry == null) {
            break;
        }
        bulkloadEvent = WALEdit.getBulkLoadDescriptor(entry.getEdit().getCells().get(0));
        if (bulkloadEvent != null) {
            break;
        }
    }
    // we should have 1 bulk load event
    assertTrue(bulkloadEvent != null);
    assertEquals(expectedLoadFileCount, bulkloadEvent.getStoresCount());
    // replay the bulk load event
    secondaryRegion.replayWALBulkLoadEventMarker(bulkloadEvent);
    List<String> storeFileName = new ArrayList<>();
    for (StoreDescriptor storeDesc : bulkloadEvent.getStoresList()) {
        storeFileName.addAll(storeDesc.getStoreFileList());
    }
    // assert that the bulk loaded files are picked
    for (Store s : secondaryRegion.getStores()) {
        for (StoreFile sf : s.getStorefiles()) {
            storeFileName.remove(sf.getPath().getName());
        }
    }
    assertTrue("Found some store file isn't loaded:" + storeFileName, storeFileName.isEmpty());
    LOG.info("-- Verifying edits from secondary");
    for (byte[] family : families) {
        assertGet(secondaryRegion, family, randomValues);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) ArrayList(java.util.ArrayList) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor) Random(java.util.Random) Pair(org.apache.hadoop.hbase.util.Pair) Test(org.junit.Test)

Aggregations

BulkLoadDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor)7 ArrayList (java.util.ArrayList)5 StoreDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)5 IOException (java.io.IOException)4 List (java.util.List)3 Cell (org.apache.hadoop.hbase.Cell)3 Pair (org.apache.hadoop.hbase.util.Pair)3 Path (org.apache.hadoop.fs.Path)2 HashMap (java.util.HashMap)1 Random (java.util.Random)1 ByteBufferCell (org.apache.hadoop.hbase.ByteBufferCell)1 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)1 CompactionDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor)1 FlushDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor)1 RegionEventDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor)1 WAL (org.apache.hadoop.hbase.wal.WAL)1 WALSplitter (org.apache.hadoop.hbase.wal.WALSplitter)1 Test (org.junit.Test)1