Search in sources :

Example 1 with StoreDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor in project hbase by apache.

the class ReplicationSourceWALReaderThread method countDistinctRowKeysAndHFiles.

/**
   * Count the number of different row keys in the given edit because of mini-batching. We assume
   * that there's at least one Cell in the WALEdit.
   * @param edit edit to count row keys from
   * @return number of different row keys and HFiles
   */
private Pair<Integer, Integer> countDistinctRowKeysAndHFiles(WALEdit edit) {
    List<Cell> cells = edit.getCells();
    int distinctRowKeys = 1;
    int totalHFileEntries = 0;
    Cell lastCell = cells.get(0);
    int totalCells = edit.size();
    for (int i = 0; i < totalCells; i++) {
        // Count HFiles to be replicated
        if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
            try {
                BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
                List<StoreDescriptor> stores = bld.getStoresList();
                int totalStores = stores.size();
                for (int j = 0; j < totalStores; j++) {
                    totalHFileEntries += stores.get(j).getStoreFileList().size();
                }
            } catch (IOException e) {
                LOG.error("Failed to deserialize bulk load entry from wal edit. " + "Then its hfiles count will not be added into metric.");
            }
        }
        if (!CellUtil.matchingRows(cells.get(i), lastCell)) {
            distinctRowKeys++;
        }
        lastCell = cells.get(i);
    }
    Pair<Integer, Integer> result = new Pair<>(distinctRowKeys, totalHFileEntries);
    return result;
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) IOException(java.io.IOException) Cell(org.apache.hadoop.hbase.Cell) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor) Pair(org.apache.hadoop.hbase.util.Pair)

Example 2 with StoreDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor in project hbase by apache.

the class ReplicationSourceWALReaderThread method calculateTotalSizeOfStoreFiles.

/**
   * Calculate the total size of all the store files
   * @param edit edit to count row keys from
   * @return the total size of the store files
   */
private int calculateTotalSizeOfStoreFiles(WALEdit edit) {
    List<Cell> cells = edit.getCells();
    int totalStoreFilesSize = 0;
    int totalCells = edit.size();
    for (int i = 0; i < totalCells; i++) {
        if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
            try {
                BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
                List<StoreDescriptor> stores = bld.getStoresList();
                int totalStores = stores.size();
                for (int j = 0; j < totalStores; j++) {
                    totalStoreFilesSize += stores.get(j).getStoreFileSizeBytes();
                }
            } catch (IOException e) {
                LOG.error("Failed to deserialize bulk load entry from wal edit. " + "Size of HFiles part of cell will not be considered in replication " + "request size calculation.", e);
            }
        }
    }
    return totalStoreFilesSize;
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) IOException(java.io.IOException) Cell(org.apache.hadoop.hbase.Cell) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)

Example 3 with StoreDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor in project hbase by apache.

the class ReplicationSink method buildBulkLoadHFileMap.

private void buildBulkLoadHFileMap(final Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap, TableName table, BulkLoadDescriptor bld) throws IOException {
    List<StoreDescriptor> storesList = bld.getStoresList();
    int storesSize = storesList.size();
    for (int j = 0; j < storesSize; j++) {
        StoreDescriptor storeDescriptor = storesList.get(j);
        List<String> storeFileList = storeDescriptor.getStoreFileList();
        int storeFilesSize = storeFileList.size();
        hfilesReplicated += storeFilesSize;
        for (int k = 0; k < storeFilesSize; k++) {
            byte[] family = storeDescriptor.getFamilyName().toByteArray();
            // Build hfile relative path from its namespace
            String pathToHfileFromNS = getHFilePath(table, bld, storeFileList.get(k), family);
            String tableName = table.getNameWithNamespaceInclAsString();
            List<Pair<byte[], List<String>>> familyHFilePathsList = bulkLoadHFileMap.get(tableName);
            if (familyHFilePathsList != null) {
                boolean foundFamily = false;
                for (Pair<byte[], List<String>> familyHFilePathsPair : familyHFilePathsList) {
                    if (Bytes.equals(familyHFilePathsPair.getFirst(), family)) {
                        // Found family already present, just add the path to the existing list
                        familyHFilePathsPair.getSecond().add(pathToHfileFromNS);
                        foundFamily = true;
                        break;
                    }
                }
                if (!foundFamily) {
                    // Family not found, add this family and its hfile paths pair to the list
                    addFamilyAndItsHFilePathToTableInMap(family, pathToHfileFromNS, familyHFilePathsList);
                }
            } else {
                // Add this table entry into the map
                addNewTableEntryInMap(bulkLoadHFileMap, family, pathToHfileFromNS, tableName);
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor) Pair(org.apache.hadoop.hbase.util.Pair)

Example 4 with StoreDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor in project hbase by apache.

the class BulkLoadCellFilter method filterCell.

/**
 * Filters the bulk load cell using the supplied predicate.
 * @param cell The WAL cell to filter.
 * @param famPredicate Returns true of given family should be removed.
 * @return The filtered cell.
 */
public Cell filterCell(Cell cell, Predicate<byte[]> famPredicate) {
    byte[] fam;
    BulkLoadDescriptor bld = null;
    try {
        bld = WALEdit.getBulkLoadDescriptor(cell);
    } catch (IOException e) {
        LOG.warn("Failed to get bulk load events information from the WAL file.", e);
        return cell;
    }
    List<StoreDescriptor> storesList = bld.getStoresList();
    // Copy the StoreDescriptor list and update it as storesList is a unmodifiableList
    List<StoreDescriptor> copiedStoresList = new ArrayList<>(storesList);
    Iterator<StoreDescriptor> copiedStoresListIterator = copiedStoresList.iterator();
    boolean anyStoreRemoved = false;
    while (copiedStoresListIterator.hasNext()) {
        StoreDescriptor sd = copiedStoresListIterator.next();
        fam = sd.getFamilyName().toByteArray();
        if (famPredicate.apply(fam)) {
            copiedStoresListIterator.remove();
            anyStoreRemoved = true;
        }
    }
    if (!anyStoreRemoved) {
        return cell;
    } else if (copiedStoresList.isEmpty()) {
        return null;
    }
    BulkLoadDescriptor.Builder newDesc = BulkLoadDescriptor.newBuilder().setTableName(bld.getTableName()).setEncodedRegionName(bld.getEncodedRegionName()).setBulkloadSeqNum(bld.getBulkloadSeqNum());
    newDesc.addAllStores(copiedStoresList);
    BulkLoadDescriptor newBulkLoadDescriptor = newDesc.build();
    return cellBuilder.clear().setRow(CellUtil.cloneRow(cell)).setFamily(WALEdit.METAFAMILY).setQualifier(WALEdit.BULK_LOAD).setTimestamp(cell.getTimestamp()).setType(cell.getTypeByte()).setValue(newBulkLoadDescriptor.toByteArray()).build();
}
Also used : BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)

Example 5 with StoreDescriptor

use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor in project hbase by apache.

the class TestHRegionReplayEvents method testReplayBulkLoadEvent.

/**
 * Tests replaying region open markers from primary region. Checks whether the files are picked up
 */
@Test
public void testReplayBulkLoadEvent() throws IOException {
    LOG.info("testReplayBulkLoadEvent starts");
    // no flush
    putDataWithFlushes(primaryRegion, 100, 0, 100);
    // close the region and open again.
    primaryRegion.close();
    primaryRegion = HRegion.openHRegion(rootDir, primaryHri, htd, walPrimary, CONF, rss, null);
    // bulk load a file into primary region
    Random random = new Random();
    byte[] randomValues = new byte[20];
    random.nextBytes(randomValues);
    Path testPath = TEST_UTIL.getDataTestDirOnTestFS();
    List<Pair<byte[], String>> familyPaths = new ArrayList<>();
    int expectedLoadFileCount = 0;
    for (byte[] family : families) {
        familyPaths.add(new Pair<>(family, createHFileForFamilies(testPath, family, randomValues)));
        expectedLoadFileCount++;
    }
    primaryRegion.bulkLoadHFiles(familyPaths, false, null);
    // now replay the edits and the bulk load marker
    reader = createWALReaderForPrimary();
    LOG.info("-- Replaying edits and region events in secondary");
    BulkLoadDescriptor bulkloadEvent = null;
    while (true) {
        WAL.Entry entry = reader.next();
        if (entry == null) {
            break;
        }
        bulkloadEvent = WALEdit.getBulkLoadDescriptor(entry.getEdit().getCells().get(0));
        if (bulkloadEvent != null) {
            break;
        }
    }
    // we should have 1 bulk load event
    assertTrue(bulkloadEvent != null);
    assertEquals(expectedLoadFileCount, bulkloadEvent.getStoresCount());
    // replay the bulk load event
    secondaryRegion.replayWALBulkLoadEventMarker(bulkloadEvent);
    List<String> storeFileName = new ArrayList<>();
    for (StoreDescriptor storeDesc : bulkloadEvent.getStoresList()) {
        storeFileName.addAll(storeDesc.getStoreFileList());
    }
    // assert that the bulk loaded files are picked
    for (HStore s : secondaryRegion.getStores()) {
        for (HStoreFile sf : s.getStorefiles()) {
            storeFileName.remove(sf.getPath().getName());
        }
    }
    assertTrue("Found some store file isn't loaded:" + storeFileName, storeFileName.isEmpty());
    LOG.info("-- Verifying edits from secondary");
    for (byte[] family : families) {
        assertGet(secondaryRegion, family, randomValues);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WAL(org.apache.hadoop.hbase.wal.WAL) BulkLoadDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor) ArrayList(java.util.ArrayList) StoreDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor) Random(java.util.Random) Pair(org.apache.hadoop.hbase.util.Pair) Test(org.junit.Test)

Aggregations

StoreDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor)14 BulkLoadDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor)8 IOException (java.io.IOException)5 Cell (org.apache.hadoop.hbase.Cell)5 Pair (org.apache.hadoop.hbase.util.Pair)5 ArrayList (java.util.ArrayList)4 WAL (org.apache.hadoop.hbase.wal.WAL)4 Test (org.junit.Test)4 Path (org.apache.hadoop.fs.Path)3 ServerName (org.apache.hadoop.hbase.ServerName)3 RegionEventDescriptor (org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor)3 FileNotFoundException (java.io.FileNotFoundException)2 List (java.util.List)2 Put (org.apache.hadoop.hbase.client.Put)2 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)2 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)2 WALEdit (org.apache.hadoop.hbase.wal.WALEdit)2 Random (java.util.Random)1 Configuration (org.apache.hadoop.conf.Configuration)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1