Search in sources :

Example 1 with BloomFilterHeader

use of org.apache.parquet.format.BloomFilterHeader in project parquet-mr by apache.

the class ParquetFileReader method readBloomFilter.

/**
 * Reads Bloom filter data for the given column chunk.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an BloomFilter object.
 * @throws IOException if there is an error while reading the Bloom filter.
 */
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
    long bloomFilterOffset = meta.getBloomFilterOffset();
    if (bloomFilterOffset < 0) {
        return null;
    }
    // Prepare to decrypt Bloom filter (for encrypted columns)
    BlockCipher.Decryptor bloomFilterDecryptor = null;
    byte[] bloomFilterHeaderAAD = null;
    byte[] bloomFilterBitsetAAD = null;
    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
        InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
        if (columnDecryptionSetup.isEncrypted()) {
            bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
            bloomFilterHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
            bloomFilterBitsetAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterBitset, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
        }
    }
    // Read Bloom filter data header.
    f.seek(bloomFilterOffset);
    BloomFilterHeader bloomFilterHeader;
    try {
        bloomFilterHeader = Util.readBloomFilterHeader(f, bloomFilterDecryptor, bloomFilterHeaderAAD);
    } catch (IOException e) {
        LOG.warn("read no bloom filter");
        return null;
    }
    int numBytes = bloomFilterHeader.getNumBytes();
    if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
        LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
        return null;
    }
    if (!bloomFilterHeader.getHash().isSetXXHASH() || !bloomFilterHeader.getAlgorithm().isSetBLOCK() || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
        LOG.warn("the read bloom filter is not supported yet,  algorithm = {}, hash = {}, compression = {}", bloomFilterHeader.getAlgorithm(), bloomFilterHeader.getHash(), bloomFilterHeader.getCompression());
        return null;
    }
    byte[] bitset;
    if (null == bloomFilterDecryptor) {
        bitset = new byte[numBytes];
        f.readFully(bitset);
    } else {
        bitset = bloomFilterDecryptor.decrypt(f, bloomFilterBitsetAAD);
        if (bitset.length != numBytes) {
            throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
        }
    }
    return new BlockSplitBloomFilter(bitset);
}
Also used : BlockSplitBloomFilter(org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter) BlockCipher(org.apache.parquet.format.BlockCipher) InternalColumnDecryptionSetup(org.apache.parquet.crypto.InternalColumnDecryptionSetup) ParquetCryptoRuntimeException(org.apache.parquet.crypto.ParquetCryptoRuntimeException) BloomFilterHeader(org.apache.parquet.format.BloomFilterHeader) IOException(java.io.IOException)

Example 2 with BloomFilterHeader

use of org.apache.parquet.format.BloomFilterHeader in project parquet-mr by apache.

the class ParquetMetadataConverter method toBloomFilterHeader.

public static BloomFilterHeader toBloomFilterHeader(org.apache.parquet.column.values.bloomfilter.BloomFilter bloomFilter) {
    BloomFilterAlgorithm algorithm = null;
    BloomFilterHash hashStrategy = null;
    BloomFilterCompression compression = null;
    if (bloomFilter.getAlgorithm() == BloomFilter.Algorithm.BLOCK) {
        algorithm = BloomFilterAlgorithm.BLOCK(new SplitBlockAlgorithm());
    }
    if (bloomFilter.getHashStrategy() == BloomFilter.HashStrategy.XXH64) {
        hashStrategy = BloomFilterHash.XXHASH(new XxHash());
    }
    if (bloomFilter.getCompression() == BloomFilter.Compression.UNCOMPRESSED) {
        compression = BloomFilterCompression.UNCOMPRESSED(new Uncompressed());
    }
    if (algorithm != null && hashStrategy != null && compression != null) {
        return new BloomFilterHeader(bloomFilter.getBitsetSize(), algorithm, hashStrategy, compression);
    } else {
        throw new IllegalArgumentException(String.format("Failed to build thrift structure for BloomFilterHeader," + "algorithm=%s, hash=%s, compression=%s", bloomFilter.getAlgorithm(), bloomFilter.getHashStrategy(), bloomFilter.getCompression()));
    }
}
Also used : SplitBlockAlgorithm(org.apache.parquet.format.SplitBlockAlgorithm) Uncompressed(org.apache.parquet.format.Uncompressed) BloomFilterAlgorithm(org.apache.parquet.format.BloomFilterAlgorithm) BloomFilterCompression(org.apache.parquet.format.BloomFilterCompression) XxHash(org.apache.parquet.format.XxHash) BloomFilterHeader(org.apache.parquet.format.BloomFilterHeader) BloomFilterHash(org.apache.parquet.format.BloomFilterHash)

Aggregations

BloomFilterHeader (org.apache.parquet.format.BloomFilterHeader)2 IOException (java.io.IOException)1 BlockSplitBloomFilter (org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter)1 InternalColumnDecryptionSetup (org.apache.parquet.crypto.InternalColumnDecryptionSetup)1 ParquetCryptoRuntimeException (org.apache.parquet.crypto.ParquetCryptoRuntimeException)1 BlockCipher (org.apache.parquet.format.BlockCipher)1 BloomFilterAlgorithm (org.apache.parquet.format.BloomFilterAlgorithm)1 BloomFilterCompression (org.apache.parquet.format.BloomFilterCompression)1 BloomFilterHash (org.apache.parquet.format.BloomFilterHash)1 SplitBlockAlgorithm (org.apache.parquet.format.SplitBlockAlgorithm)1 Uncompressed (org.apache.parquet.format.Uncompressed)1 XxHash (org.apache.parquet.format.XxHash)1