use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.
the class TestRowPrefixBloomFilter method testRowPrefixBloomFilterWithScan.
@Test
public void testRowPrefixBloomFilterWithScan() throws Exception {
FileSystem fs = FileSystem.getLocal(conf);
int expKeys = fixedLengthExpKeys;
// write the file
Path f = new Path(testDir, name.getMethodName());
writeStoreFile(f, bt, expKeys);
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
reader.loadFileInfo();
reader.loadBloomfilter();
StoreFileScanner scanner = getStoreFileScanner(reader);
HStore store = mock(HStore.class);
when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
// Scan with valid row style. startRow and stopRow have a common prefix.
// And the length of the common prefix is no less than prefixLength.
// prefix row in bloom
String prefixRow = String.format(prefixFormatter, prefixRowCount - 2);
String startRow = generateRowWithSuffix(prefixRow, 0);
String stopRow = generateRowWithSuffix(prefixRow, 1);
Scan scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
assertTrue(exists);
// prefix row not in bloom
prefixRow = String.format(prefixFormatter, prefixRowCount - 1);
startRow = generateRowWithSuffix(prefixRow, 0);
stopRow = generateRowWithSuffix(prefixRow, 1);
scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
assertFalse(exists);
// There is no common prefix between startRow and stopRow.
prefixRow = String.format(prefixFormatter, prefixRowCount - 2);
startRow = generateRowWithSuffix(prefixRow, 0);
scan = new Scan().withStartRow(Bytes.toBytes(startRow));
exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
assertTrue(exists);
// startRow and stopRow have a common prefix.
// But the length of the common prefix is less than prefixLength.
String prefixStartRow = String.format(prefixFormatter, prefixRowCount - 2);
String prefixStopRow = String.format(prefixFormatter, prefixRowCount - 1);
startRow = generateRowWithSuffix(prefixStartRow, 0);
stopRow = generateRowWithSuffix(prefixStopRow, 0);
scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
assertTrue(exists);
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.
the class BulkLoadHFilesTool method copyHFileHalf.
/**
* Copy half of an HFile into a new HFile.
*/
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException {
FileSystem fs = inFile.getFileSystem(conf);
CacheConfig cacheConf = CacheConfig.DISABLED;
HalfStoreFileReader halfReader = null;
StoreFileWriter halfWriter = null;
try {
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, inFile).build();
HFileInfo hfile = new HFileInfo(context, conf);
halfReader = new HalfStoreFileReader(context, hfile, cacheConf, reference, new AtomicInteger(0), conf);
hfile.initMetaAndIndex(halfReader.getHFileReader());
Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
int blocksize = familyDescriptor.getBlocksize();
Algorithm compression = familyDescriptor.getCompressionType();
BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
HFileScanner scanner = halfReader.getScanner(false, false, false);
scanner.seekTo();
do {
halfWriter.append(scanner.getCell());
} while (scanner.next());
for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
if (shouldCopyHFileMetaKey(entry.getKey())) {
halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
}
}
} finally {
if (halfReader != null) {
try {
halfReader.close(cacheConf.shouldEvictOnClose());
} catch (IOException e) {
LOG.warn("failed to close hfile reader for " + inFile, e);
}
}
if (halfWriter != null) {
halfWriter.close();
}
}
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.
the class TestHStoreFile method testReseek.
/**
* Test for HBASE-8012
*/
@Test
public void testReseek() throws Exception {
// write the file
Path f = new Path(ROOT_DIR, name.getMethodName());
HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withFileContext(meta).build();
writeStoreFile(writer);
writer.close();
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
// Now do reseek with empty KV to position to the beginning of the file
KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
StoreFileScanner s = getStoreFileScanner(reader, false, false);
s.reseek(k);
assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.
the class HStoreFile method open.
/**
* Opens reader on this store file. Called by Constructor.
* @see #closeStoreFile(boolean)
*/
private void open() throws IOException {
fileInfo.initHDFSBlocksDistribution();
long readahead = fileInfo.isNoReadahead() ? 0L : -1L;
ReaderContext context = fileInfo.createReaderContext(false, readahead, ReaderType.PREAD);
fileInfo.initHFileInfo(context);
StoreFileReader reader = fileInfo.preStoreFileReaderOpen(context, cacheConf);
if (reader == null) {
reader = fileInfo.createReader(context, cacheConf);
fileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
}
this.initialReader = fileInfo.postStoreFileReaderOpen(context, cacheConf, reader);
if (InputStreamBlockDistribution.isEnabled(fileInfo.getConf())) {
boolean useHBaseChecksum = context.getInputStreamWrapper().shouldUseHBaseChecksum();
FSDataInputStream stream = context.getInputStreamWrapper().getStream(useHBaseChecksum);
this.initialReaderBlockDistribution = new InputStreamBlockDistribution(stream, fileInfo);
}
// Load up indices and fileinfo. This also loads Bloom filter type.
metadataMap = Collections.unmodifiableMap(initialReader.loadFileInfo());
// Read in our metadata.
byte[] b = metadataMap.get(MAX_SEQ_ID_KEY);
if (b != null) {
// By convention, if halfhfile, top half has a sequence number > bottom
// half. Thats why we add one in below. Its done for case the two halves
// are ever merged back together --rare. Without it, on open of store,
// since store files are distinguished by sequence id, the one half would
// subsume the other.
this.sequenceid = Bytes.toLong(b);
if (fileInfo.isTopReference()) {
this.sequenceid += 1;
}
}
if (isBulkLoadResult()) {
// generate the sequenceId from the fileName
// fileName is of the form <randomName>_SeqId_<id-when-loaded>_
String fileName = this.getPath().getName();
// Use lastIndexOf() to get the last, most recent bulk load seqId.
int startPos = fileName.lastIndexOf("SeqId_");
if (startPos != -1) {
this.sequenceid = Long.parseLong(fileName.substring(startPos + 6, fileName.indexOf('_', startPos + 6)));
// Handle reference files as done above.
if (fileInfo.isTopReference()) {
this.sequenceid += 1;
}
}
// SKIP_RESET_SEQ_ID only works in bulk loaded file.
// In mob compaction, the hfile where the cells contain the path of a new mob file is bulk
// loaded to hbase, these cells have the same seqIds with the old ones. We do not want
// to reset new seqIds for them since this might make a mess of the visibility of cells that
// have the same row key but different seqIds.
boolean skipResetSeqId = isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID));
if (skipResetSeqId) {
// increase the seqId when it is a bulk loaded file from mob compaction.
this.sequenceid += 1;
}
initialReader.setSkipResetSeqId(skipResetSeqId);
initialReader.setBulkLoaded(true);
}
initialReader.setSequenceID(this.sequenceid);
b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
if (b != null) {
this.maxMemstoreTS = Bytes.toLong(b);
}
b = metadataMap.get(MAJOR_COMPACTION_KEY);
if (b != null) {
boolean mc = Bytes.toBoolean(b);
if (this.majorCompaction == null) {
this.majorCompaction = new AtomicBoolean(mc);
} else {
this.majorCompaction.set(mc);
}
} else {
// Presume it is not major compacted if it doesn't explicity say so
// HFileOutputFormat explicitly sets the major compacted key.
this.majorCompaction = new AtomicBoolean(false);
}
b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
BloomType hfileBloomType = initialReader.getBloomFilterType();
if (cfBloomType != BloomType.NONE) {
initialReader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
if (hfileBloomType != cfBloomType) {
LOG.debug("HFile Bloom filter type for " + initialReader.getHFileReader().getName() + ": " + hfileBloomType + ", but " + cfBloomType + " specified in column family " + "configuration");
}
} else if (hfileBloomType != BloomType.NONE) {
LOG.info("Bloom filter turned off by CF config for " + initialReader.getHFileReader().getName());
}
// load delete family bloom filter
initialReader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
try {
byte[] data = metadataMap.get(TIMERANGE_KEY);
initialReader.timeRange = data == null ? null : TimeRangeTracker.parseFrom(data).toTimeRange();
} catch (IllegalArgumentException e) {
LOG.error("Error reading timestamp range data from meta -- " + "proceeding without", e);
this.initialReader.timeRange = null;
}
try {
byte[] data = metadataMap.get(COMPACTION_EVENT_KEY);
this.compactedStoreFiles.addAll(ProtobufUtil.toCompactedStoreFiles(data));
} catch (IOException e) {
LOG.error("Error reading compacted storefiles from meta data", e);
}
// initialize so we can reuse them after reader closed.
firstKey = initialReader.getFirstKey();
lastKey = initialReader.getLastKey();
comparator = initialReader.getComparator();
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.
the class TestRowPrefixBloomFilter method testRowPrefixBloomFilter.
@Test
public void testRowPrefixBloomFilter() throws Exception {
FileSystem fs = FileSystem.getLocal(conf);
float expErr = 2 * prefixRowCount * suffixRowCount * err;
int expKeys = fixedLengthExpKeys;
// write the file
Path f = new Path(testDir, name.getMethodName());
writeStoreFile(f, bt, expKeys);
// read the file
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
reader.loadFileInfo();
reader.loadBloomfilter();
// check basic param
assertEquals(bt, reader.getBloomFilterType());
assertEquals(prefixLength, reader.getPrefixLength());
assertEquals(expKeys, reader.getGeneralBloomFilter().getKeyCount());
StoreFileScanner scanner = getStoreFileScanner(reader);
HStore store = mock(HStore.class);
when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < prefixRowCount; i++) {
// prefix rows
String prefixRow = String.format(prefixFormatter, i);
for (int j = 0; j < suffixRowCount; j++) {
// suffix rows
String startRow = generateRowWithSuffix(prefixRow, j);
String stopRow = generateRowWithSuffix(prefixRow, j + 1);
Scan scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldPrefixRowExist = i % 2 == 0;
if (shouldPrefixRowExist) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
}
for (int i = prefixRowCount; i < prefixRowCount * 2; i++) {
// prefix rows
String row = String.format(invalidFormatter, i);
Scan scan = new Scan(new Get(Bytes.toBytes(row)));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldPrefixRowExist = i % 2 == 0;
if (shouldPrefixRowExist) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
assertEquals("False negatives: " + falseNeg, 0, falseNeg);
int maxFalsePos = (int) (2 * expErr);
assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " + maxFalsePos + ")", falsePos <= maxFalsePos);
}
Aggregations