use of org.apache.parquet.format.BloomFilterHeader in project parquet-mr by apache.
the class ParquetFileReader method readBloomFilter.
/**
* Reads Bloom filter data for the given column chunk.
*
* @param meta a column's ColumnChunkMetaData to read the dictionary from
* @return an BloomFilter object.
* @throws IOException if there is an error while reading the Bloom filter.
*/
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
long bloomFilterOffset = meta.getBloomFilterOffset();
if (bloomFilterOffset < 0) {
return null;
}
// Prepare to decrypt Bloom filter (for encrypted columns)
BlockCipher.Decryptor bloomFilterDecryptor = null;
byte[] bloomFilterHeaderAAD = null;
byte[] bloomFilterBitsetAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
if (columnDecryptionSetup.isEncrypted()) {
bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
bloomFilterHeaderAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterHeader, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
bloomFilterBitsetAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.BloomFilterBitset, meta.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
// Read Bloom filter data header.
f.seek(bloomFilterOffset);
BloomFilterHeader bloomFilterHeader;
try {
bloomFilterHeader = Util.readBloomFilterHeader(f, bloomFilterDecryptor, bloomFilterHeaderAAD);
} catch (IOException e) {
LOG.warn("read no bloom filter");
return null;
}
int numBytes = bloomFilterHeader.getNumBytes();
if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
return null;
}
if (!bloomFilterHeader.getHash().isSetXXHASH() || !bloomFilterHeader.getAlgorithm().isSetBLOCK() || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
LOG.warn("the read bloom filter is not supported yet, algorithm = {}, hash = {}, compression = {}", bloomFilterHeader.getAlgorithm(), bloomFilterHeader.getHash(), bloomFilterHeader.getCompression());
return null;
}
byte[] bitset;
if (null == bloomFilterDecryptor) {
bitset = new byte[numBytes];
f.readFully(bitset);
} else {
bitset = bloomFilterDecryptor.decrypt(f, bloomFilterBitsetAAD);
if (bitset.length != numBytes) {
throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
}
}
return new BlockSplitBloomFilter(bitset);
}
use of org.apache.parquet.format.BloomFilterHeader in project parquet-mr by apache.
the class ParquetMetadataConverter method toBloomFilterHeader.
public static BloomFilterHeader toBloomFilterHeader(org.apache.parquet.column.values.bloomfilter.BloomFilter bloomFilter) {
BloomFilterAlgorithm algorithm = null;
BloomFilterHash hashStrategy = null;
BloomFilterCompression compression = null;
if (bloomFilter.getAlgorithm() == BloomFilter.Algorithm.BLOCK) {
algorithm = BloomFilterAlgorithm.BLOCK(new SplitBlockAlgorithm());
}
if (bloomFilter.getHashStrategy() == BloomFilter.HashStrategy.XXH64) {
hashStrategy = BloomFilterHash.XXHASH(new XxHash());
}
if (bloomFilter.getCompression() == BloomFilter.Compression.UNCOMPRESSED) {
compression = BloomFilterCompression.UNCOMPRESSED(new Uncompressed());
}
if (algorithm != null && hashStrategy != null && compression != null) {
return new BloomFilterHeader(bloomFilter.getBitsetSize(), algorithm, hashStrategy, compression);
} else {
throw new IllegalArgumentException(String.format("Failed to build thrift structure for BloomFilterHeader," + "algorithm=%s, hash=%s, compression=%s", bloomFilter.getAlgorithm(), bloomFilter.getHashStrategy(), bloomFilter.getCompression()));
}
}
Aggregations