Search in sources :

Example 1 with CompoundBloomFilterWriter

use of org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter in project hbase by apache.

the class BloomFilterFactory method createGeneralBloomAtWrite.

/**
   * Creates a new general (Row or RowCol) Bloom filter at the time of
   * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
   *
   * @param conf
   * @param cacheConf
   * @param bloomType
   * @param maxKeys an estimate of the number of keys we expect to insert.
   *        Irrelevant if compound Bloom filters are enabled.
   * @param writer the HFile writer
   * @return the new Bloom filter, or null in case Bloom filters are disabled
   *         or when failed to create one.
   */
public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf, CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) {
    if (!isGeneralBloomEnabled(conf)) {
        LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath() + (conf == null ? " (configuration is null)" : ""));
        return null;
    } else if (bloomType == BloomType.NONE) {
        LOG.trace("Bloom filter is turned off for the column family");
        return null;
    }
    float err = getErrorRate(conf);
    // p = 1 - sqrt(1 - err).
    if (bloomType == BloomType.ROWCOL) {
        err = (float) (1 - Math.sqrt(1 - err));
    }
    int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
    // Do we support compound bloom filters?
    // In case of compound Bloom filters we ignore the maxKeys hint.
    CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL ? CellComparator.COMPARATOR : null, bloomType);
    writer.addInlineBlockWriter(bloomWriter);
    return bloomWriter;
}
Also used : CompoundBloomFilterWriter(org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter)

Example 2 with CompoundBloomFilterWriter

use of org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter in project hbase by apache.

the class BloomFilterFactory method createDeleteBloomAtWrite.

/**
   * Creates a new Delete Family Bloom filter at the time of
   * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
   * @param conf
   * @param cacheConf
   * @param maxKeys an estimate of the number of keys we expect to insert.
   *        Irrelevant if compound Bloom filters are enabled.
   * @param writer the HFile writer
   * @return the new Bloom filter, or null in case Bloom filters are disabled
   *         or when failed to create one.
   */
public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf, CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
    if (!isDeleteFamilyBloomEnabled(conf)) {
        LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath() + (conf == null ? " (configuration is null)" : ""));
        return null;
    }
    float err = getErrorRate(conf);
    int maxFold = getMaxFold(conf);
    // In case of compound Bloom filters we ignore the maxKeys hint.
    CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), null, BloomType.ROW);
    writer.addInlineBlockWriter(bloomWriter);
    return bloomWriter;
}
Also used : CompoundBloomFilterWriter(org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter)

Example 3 with CompoundBloomFilterWriter

use of org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter in project hbase by apache.

the class TestCompoundBloomFilter method writeStoreFile.

private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs) throws IOException {
    conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZES[t]);
    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
    cacheConf = new CacheConfig(conf);
    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCK_SIZES[t]).build();
    StoreFileWriter w = new StoreFileWriter.Builder(conf, cacheConf, fs).withOutputDir(TEST_UTIL.getDataTestDir()).withBloomType(bt).withFileContext(meta).build();
    assertTrue(w.hasGeneralBloom());
    assertTrue(w.getGeneralBloomWriter() instanceof CompoundBloomFilterWriter);
    CompoundBloomFilterWriter cbbf = (CompoundBloomFilterWriter) w.getGeneralBloomWriter();
    int keyCount = 0;
    KeyValue prev = null;
    LOG.debug("Total keys/values to insert: " + kvs.size());
    for (KeyValue kv : kvs) {
        w.append(kv);
        // Validate the key count in the Bloom filter.
        boolean newKey = true;
        if (prev != null) {
            newKey = !(bt == BloomType.ROW ? CellUtil.matchingRows(kv, prev) : CellUtil.matchingRowColumn(kv, prev));
        }
        if (newKey)
            ++keyCount;
        assertEquals(keyCount, cbbf.getKeyCount());
        prev = kv;
    }
    w.close();
    return w.getPath();
}
Also used : CompoundBloomFilterWriter(org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter) KeyValue(org.apache.hadoop.hbase.KeyValue) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Aggregations

CompoundBloomFilterWriter (org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter)3 KeyValue (org.apache.hadoop.hbase.KeyValue)1 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)1 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)1 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)1