Search in sources :

Example 1 with BloomFilter

use of org.apache.hadoop.hbase.util.BloomFilter in project hbase by apache.

the class StoreFileReader method passesGeneralRowBloomFilter.

/**
 * A method for checking Bloom filters. Called directly from
 * StoreFileScanner in case of a multi-column query.
 *
 * @return True if passes
 */
private boolean passesGeneralRowBloomFilter(byte[] row, int rowOffset, int rowLen) {
    BloomFilter bloomFilter = this.generalBloomFilter;
    if (bloomFilter == null) {
        return true;
    }
    // Used in ROW bloom
    byte[] key = null;
    if (rowOffset != 0 || rowLen != row.length) {
        throw new AssertionError("For row-only Bloom filters the row must occupy the whole array");
    }
    key = row;
    return checkGeneralBloomFilter(key, null, bloomFilter);
}
Also used : BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Example 2 with BloomFilter

use of org.apache.hadoop.hbase.util.BloomFilter in project hbase by apache.

the class HFilePrettyPrinter method printMeta.

private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException {
    out.println("Block index size as per heapsize: " + reader.indexSize());
    out.println(asSeparateLines(reader.toString()));
    out.println("Trailer:\n    " + asSeparateLines(reader.getTrailer().toString()));
    out.println("Fileinfo:");
    for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
        out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
        if (Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY)) {
            out.println(Bytes.toLong(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) {
            TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue());
            out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
        } else if (Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN)) {
            out.println(Bytes.toInt(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY)) {
            out.println(Bytes.toBoolean(e.getValue()));
        } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) {
            out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString());
        } else {
            out.println(Bytes.toStringBinary(e.getValue()));
        }
    }
    try {
        out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString));
    } catch (Exception e) {
        out.println("Unable to retrieve the midkey");
    }
    // Printing general bloom information
    DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
    BloomFilter bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
    }
    // Printing delete bloom information
    bloomMeta = reader.getDeleteBloomFilterMetadata();
    bloomFilter = null;
    if (bloomMeta != null)
        bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
    out.println("Delete Family Bloom filter:");
    if (bloomFilter != null) {
        out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
    } else {
        out.println(FOUR_SPACES + "Not present");
    }
}
Also used : DataInput(java.io.DataInput) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) PrivateCellUtil(org.apache.hadoop.hbase.PrivateCellUtil) CellUtil(org.apache.hadoop.hbase.CellUtil) ParseException(org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException) IOException(java.io.IOException) BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Example 3 with BloomFilter

use of org.apache.hadoop.hbase.util.BloomFilter in project hbase by apache.

the class StoreFileReader method passesGeneralRowPrefixBloomFilter.

/**
 * A method for checking Bloom filters. Called directly from
 * StoreFileScanner in case of a multi-column query.
 *
 * @return True if passes
 */
private boolean passesGeneralRowPrefixBloomFilter(Scan scan) {
    BloomFilter bloomFilter = this.generalBloomFilter;
    if (bloomFilter == null) {
        return true;
    }
    byte[] row = scan.getStartRow();
    byte[] rowPrefix;
    if (scan.isGetScan()) {
        rowPrefix = Bytes.copy(row, 0, Math.min(prefixLength, row.length));
    } else {
        // For non-get scans
        // Find out the common prefix of startRow and stopRow.
        int commonLength = Bytes.findCommonPrefix(scan.getStartRow(), scan.getStopRow(), scan.getStartRow().length, scan.getStopRow().length, 0, 0);
        // Or the common prefix length is less than prefixLength
        if (commonLength <= 0 || commonLength < prefixLength) {
            return true;
        }
        rowPrefix = Bytes.copy(row, 0, prefixLength);
    }
    return checkGeneralBloomFilter(rowPrefix, null, bloomFilter);
}
Also used : BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Example 4 with BloomFilter

use of org.apache.hadoop.hbase.util.BloomFilter in project hbase by apache.

the class StoreFileReader method passesGeneralRowColBloomFilter.

/**
 * A method for checking Bloom filters. Called directly from
 * StoreFileScanner in case of a multi-column query.
 *
 * @param cell
 *          the cell to check if present in BloomFilter
 * @return True if passes
 */
public boolean passesGeneralRowColBloomFilter(Cell cell) {
    BloomFilter bloomFilter = this.generalBloomFilter;
    if (bloomFilter == null) {
        return true;
    }
    // Used in ROW_COL bloom
    Cell kvKey = null;
    // Already if the incoming key is a fake rowcol key then use it as it is
    if (cell.getTypeByte() == KeyValue.Type.Maximum.getCode() && cell.getFamilyLength() == 0) {
        kvKey = cell;
    } else {
        kvKey = PrivateCellUtil.createFirstOnRowCol(cell);
    }
    return checkGeneralBloomFilter(null, kvKey, bloomFilter);
}
Also used : Cell(org.apache.hadoop.hbase.Cell) BloomFilter(org.apache.hadoop.hbase.util.BloomFilter)

Aggregations

BloomFilter (org.apache.hadoop.hbase.util.BloomFilter)4 DataInput (java.io.DataInput)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 Cell (org.apache.hadoop.hbase.Cell)1 CellUtil (org.apache.hadoop.hbase.CellUtil)1 PrivateCellUtil (org.apache.hadoop.hbase.PrivateCellUtil)1 TimeRangeTracker (org.apache.hadoop.hbase.regionserver.TimeRangeTracker)1 ParseException (org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException)1