use of org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder in project hbase by apache.
the class StoreFileInfo method createReaderContext.
ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type) throws IOException {
FSDataInputStreamWrapper in;
FileStatus status;
if (this.link != null) {
// HFileLink
in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
status = this.link.getFileStatus(fs);
} else if (this.reference != null) {
// HFile Reference
Path referencePath = getReferredToFile(this.getPath());
try {
in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
} catch (FileNotFoundException fnfe) {
// Intercept the exception so can insert more info about the Reference; otherwise
// exception just complains about some random file -- operator doesn't realize it
// other end of a Reference
FileNotFoundException newFnfe = new FileNotFoundException(toString());
newFnfe.initCause(fnfe);
throw newFnfe;
}
status = fs.getFileStatus(referencePath);
} else {
in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
status = fs.getFileStatus(initialPath);
}
long length = status.getLen();
ReaderContextBuilder contextBuilder = new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length).withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).withFileSystem(fs);
if (this.reference != null) {
contextBuilder.withFilePath(this.getPath());
} else {
contextBuilder.withFilePath(status.getPath());
}
return contextBuilder.build();
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder in project hbase by apache.
the class TestRowPrefixBloomFilter method testRowPrefixBloomFilter.
@Test
public void testRowPrefixBloomFilter() throws Exception {
FileSystem fs = FileSystem.getLocal(conf);
float expErr = 2 * prefixRowCount * suffixRowCount * err;
int expKeys = fixedLengthExpKeys;
// write the file
Path f = new Path(testDir, name.getMethodName());
writeStoreFile(f, bt, expKeys);
// read the file
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
reader.loadFileInfo();
reader.loadBloomfilter();
// check basic param
assertEquals(bt, reader.getBloomFilterType());
assertEquals(prefixLength, reader.getPrefixLength());
assertEquals(expKeys, reader.getGeneralBloomFilter().getKeyCount());
StoreFileScanner scanner = getStoreFileScanner(reader);
HStore store = mock(HStore.class);
when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < prefixRowCount; i++) {
// prefix rows
String prefixRow = String.format(prefixFormatter, i);
for (int j = 0; j < suffixRowCount; j++) {
// suffix rows
String startRow = generateRowWithSuffix(prefixRow, j);
String stopRow = generateRowWithSuffix(prefixRow, j + 1);
Scan scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldPrefixRowExist = i % 2 == 0;
if (shouldPrefixRowExist) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
}
for (int i = prefixRowCount; i < prefixRowCount * 2; i++) {
// prefix rows
String row = String.format(invalidFormatter, i);
Scan scan = new Scan(new Get(Bytes.toBytes(row)));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldPrefixRowExist = i % 2 == 0;
if (shouldPrefixRowExist) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
assertEquals("False negatives: " + falseNeg, 0, falseNeg);
int maxFalsePos = (int) (2 * expErr);
assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " + maxFalsePos + ")", falsePos <= maxFalsePos);
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder in project hbase by apache.
the class TestStoreFileScannerWithTagCompression method testReseek.
@Test
public void testReseek() throws Exception {
// write the file
Path f = new Path(ROOT_DIR, "testReseek");
HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).withIncludesTags(true).withCompressTags(true).withDataBlockEncoding(DataBlockEncoding.PREFIX).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(f).withFileContext(meta).build();
writeStoreFile(writer);
writer.close();
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
StoreFileScanner s = reader.getStoreFileScanner(false, false, false, 0, 0, false);
try {
// Now do reseek with empty KV to position to the beginning of the file
KeyValue k = KeyValueUtil.createFirstOnRow(Bytes.toBytes("k2"));
s.reseek(k);
Cell kv = s.next();
kv = s.next();
kv = s.next();
byte[] key5 = Bytes.toBytes("k5");
assertTrue(Bytes.equals(key5, 0, key5.length, kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()));
List<Tag> tags = PrivateCellUtil.getTags(kv);
assertEquals(1, tags.size());
assertEquals("tag3", Bytes.toString(Tag.cloneValue(tags.get(0))));
} finally {
s.close();
}
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder in project hbase by apache.
the class TestHStoreFile method testBloomTypes.
@Test
public void testBloomTypes() throws Exception {
float err = (float) 0.01;
FileSystem fs = FileSystem.getLocal(conf);
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
int rowCount = 50;
int colCount = 10;
int versions = 2;
// run once using columns and once using rows
BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
int[] expKeys = { rowCount * colCount, rowCount };
// below line deserves commentary. it is expected bloom false positives
// column = rowCount*2*colCount inserts
// row-level = only rowCount*2 inserts, but failures will be magnified by
// 2nd for loop for every column (2*colCount)
float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
for (int x : new int[] { 0, 1 }) {
// write the file
Path f = new Path(ROOT_DIR, name.getMethodName() + x);
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
long now = EnvironmentEdgeManager.currentTime();
for (int i = 0; i < rowCount * 2; i += 2) {
// rows
for (int j = 0; j < colCount * 2; j += 2) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
for (int k = 0; k < versions; ++k) {
// versions
KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
writer.append(kv);
}
}
}
writer.close();
ReaderContext context = new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()).withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
reader.loadFileInfo();
reader.loadBloomfilter();
StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
HStore store = mock(HStore.class);
when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < rowCount * 2; ++i) {
// rows
for (int j = 0; j < colCount * 2; ++j) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
columns.add(Bytes.toBytes("col" + col));
Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldRowExist = i % 2 == 0;
boolean shouldColExist = j % 2 == 0;
shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
if (shouldRowExist && shouldColExist) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
}
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
System.out.println(bt[x].toString());
System.out.println(" False negatives: " + falseNeg);
System.out.println(" False positives: " + falsePos);
assertEquals(0, falseNeg);
assertTrue(falsePos < 2 * expErr[x]);
}
}
use of org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder in project hbase by apache.
the class TestHStoreFile method testDeleteFamilyBloomFilter.
@Test
public void testDeleteFamilyBloomFilter() throws Exception {
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
// write the file
Path f = new Path(ROOT_DIR, name.getMethodName());
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withMaxKeyCount(2000).withFileContext(meta).build();
// add delete family
long now = EnvironmentEdgeManager.currentTime();
for (int i = 0; i < 2000; i += 2) {
String row = String.format(localFormatter, i);
KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value"));
writer.append(kv);
}
writer.close();
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
HFileInfo fileInfo = new HFileInfo(context, conf);
StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
fileInfo.initMetaAndIndex(reader.getHFileReader());
reader.loadFileInfo();
reader.loadBloomfilter();
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < 2000; i++) {
String row = String.format(localFormatter, i);
byte[] rowKey = Bytes.toBytes(row);
boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length);
if (i % 2 == 0) {
if (!exists) {
falseNeg++;
}
} else {
if (exists) {
falsePos++;
}
}
}
assertEquals(1000, reader.getDeleteFamilyCnt());
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
assertEquals("False negatives: " + falseNeg, 0, falseNeg);
int maxFalsePos = (int) (2 * 2000 * err);
assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " + maxFalsePos, falsePos <= maxFalsePos);
}
Aggregations