Search in sources :

Example 11 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class TestHFileOutputFormat2 method setupMockColumnFamiliesForBloomType.

private void setupMockColumnFamiliesForBloomType(Table table, Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
    TableDescriptorBuilder mockTableDescriptor = TableDescriptorBuilder.newBuilder(TABLE_NAMES[0]);
    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
        ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(entry.getKey())).setMaxVersions(1).setBloomFilterType(entry.getValue()).setBlockCacheEnabled(false).setTimeToLive(0).build();
        mockTableDescriptor.setColumnFamily(columnFamilyDescriptor);
    }
    Mockito.doReturn(mockTableDescriptor).when(table).getDescriptor();
}
Also used : BloomType(org.apache.hadoop.hbase.regionserver.BloomType) TableDescriptorBuilder(org.apache.hadoop.hbase.client.TableDescriptorBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)

Example 12 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class HBaseTestingUtil method generateColumnDescriptors.

/**
 * Create a set of column descriptors with the combination of compression, encoding, bloom codecs
 * available.
 * @param prefix family names prefix
 * @return the list of column descriptors
 */
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
    List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
    long familyId = 0;
    for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
        for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
            for (BloomType bloomType : BloomType.values()) {
                String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
                ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
                columnFamilyDescriptorBuilder.setCompressionType(compressionType);
                columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
                columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
                columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
                familyId++;
            }
        }
    }
    return columnFamilyDescriptors;
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) Compression(org.apache.hadoop.hbase.io.compress.Compression) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) ArrayList(java.util.ArrayList) ColumnFamilyDescriptorBuilder(org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)

Example 13 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class TestSeekBeforeWithInlineBlocks method testMultiIndexLevelRandomHFileWithBlooms.

/**
 * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
 * to know the size of that data block, which it calculates using current data block offset and
 * the previous data block offset.  This fails to work when there are leaf-level index blocks in
 * the scannable section of the HFile, i.e. starting in HFileV2.  This test will try seekBefore()
 * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed.  This
 * bug also happens for inline Bloom blocks for the same reasons.
 */
@Test
public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
    conf = TEST_UTIL.getConfiguration();
    TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
    // Try out different HFile versions to ensure reverse scan works on each version
    for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
        conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
        fs = HFileSystem.get(conf);
        // Try out different bloom types because inline Bloom blocks break seekBefore()
        for (BloomType bloomType : BloomType.values()) {
            // Test out HFile block indices of various sizes/levels
            for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
                int indexBlockSize = INDEX_CHUNK_SIZES[testI];
                int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
                LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", hfileVersion, bloomType, expectedNumLevels));
                conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
                conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
                conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
                Cell[] cells = new Cell[NUM_KV];
                Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", hfileVersion, bloomType, testI));
                // Disable caching to prevent it from hiding any bugs in block seeks/reads
                conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
                CacheConfig cacheConf = new CacheConfig(conf);
                // Write the HFile
                {
                    HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build();
                    StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(hfilePath).withFileContext(meta).withBloomType(bloomType).build();
                    for (int i = 0; i < NUM_KV; i++) {
                        byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i);
                        byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND);
                        byte[] value = RandomKeyValueUtil.randomValue(RAND);
                        KeyValue kv = new KeyValue(row, FAM, qual, value);
                        storeFileWriter.append(kv);
                        cells[i] = kv;
                    }
                    storeFileWriter.close();
                }
                // Read the HFile
                HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
                // Sanity check the HFile index level
                assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
                // enabled and disabled
                for (boolean pread : new boolean[] { false, true }) {
                    HFileScanner scanner = reader.getScanner(conf, true, pread);
                    checkNoSeekBefore(cells, scanner, 0);
                    for (int i = 1; i < NUM_KV; i++) {
                        checkSeekBefore(cells, scanner, i);
                        checkCell(cells[i - 1], scanner.getCell());
                    }
                    assertTrue(scanner.seekTo());
                    for (int i = NUM_KV - 1; i >= 1; i--) {
                        checkSeekBefore(cells, scanner, i);
                        checkCell(cells[i - 1], scanner.getCell());
                    }
                    checkNoSeekBefore(cells, scanner, 0);
                    scanner.close();
                }
                reader.close();
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) KeyValue(org.apache.hadoop.hbase.KeyValue) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Aggregations

BloomType (org.apache.hadoop.hbase.regionserver.BloomType)13 Map (java.util.Map)6 TreeMap (java.util.TreeMap)6 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)6 FileSystem (org.apache.hadoop.fs.FileSystem)4 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)4 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)4 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)4 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)4 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ColumnFamilyDescriptorBuilder (org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder)3 Compression (org.apache.hadoop.hbase.io.compress.Compression)3 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Cell (org.apache.hadoop.hbase.Cell)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 HalfStoreFileReader (org.apache.hadoop.hbase.io.HalfStoreFileReader)2