use of org.apache.hadoop.hbase.io.hfile.HFileContext in project hbase by apache.
the class TestStoreFile method testBasicHalfMapFile.
/**
* Write a file and then assert that we can read from top and bottom halves
* using two HalfMapFiles.
* @throws Exception
*/
@Test
public void testBasicHalfMapFile() throws Exception {
final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, new Path(testDir, hri.getTable().getNameAsString()), hri);
HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(regionFs.createTempName()).withFileContext(meta).build();
writeStoreFile(writer);
Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE);
checkHalfHFile(regionFs, sf);
}
use of org.apache.hadoop.hbase.io.hfile.HFileContext in project hbase by apache.
the class TestStoreFile method testBloomFilter.
@Test
public void testBloomFilter() throws Exception {
FileSystem fs = FileSystem.getLocal(conf);
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
// write the file
Path f = new Path(ROOT_DIR, getName());
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
bloomWriteRead(writer, fs);
}
use of org.apache.hadoop.hbase.io.hfile.HFileContext in project hbase by apache.
the class TestStoreFile method testBloomTypes.
@Test
public void testBloomTypes() throws Exception {
float err = (float) 0.01;
FileSystem fs = FileSystem.getLocal(conf);
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
int rowCount = 50;
int colCount = 10;
int versions = 2;
// run once using columns and once using rows
BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
int[] expKeys = { rowCount * colCount, rowCount };
// below line deserves commentary. it is expected bloom false positives
// column = rowCount*2*colCount inserts
// row-level = only rowCount*2 inserts, but failures will be magnified by
// 2nd for loop for every column (2*colCount)
float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
for (int x : new int[] { 0, 1 }) {
// write the file
Path f = new Path(ROOT_DIR, getName() + x);
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
long now = System.currentTimeMillis();
for (int i = 0; i < rowCount * 2; i += 2) {
// rows
for (int j = 0; j < colCount * 2; j += 2) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
for (int k = 0; k < versions; ++k) {
// versions
KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(), ("col" + col).getBytes(), now - k, Bytes.toBytes((long) -1));
writer.append(kv);
}
}
}
writer.close();
StoreFileReader reader = new StoreFileReader(fs, f, cacheConf, conf);
reader.loadFileInfo();
reader.loadBloomfilter();
StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
Store store = mock(Store.class);
HColumnDescriptor hcd = mock(HColumnDescriptor.class);
when(hcd.getName()).thenReturn(Bytes.toBytes("family"));
when(store.getFamily()).thenReturn(hcd);
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < rowCount * 2; ++i) {
// rows
for (int j = 0; j < colCount * 2; ++j) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
columns.add(("col" + col).getBytes());
Scan scan = new Scan(row.getBytes(), row.getBytes());
scan.addColumn("family".getBytes(), ("col" + col).getBytes());
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldRowExist = i % 2 == 0;
boolean shouldColExist = j % 2 == 0;
shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
if (shouldRowExist && shouldColExist) {
if (!exists)
falseNeg++;
} else {
if (exists)
falsePos++;
}
}
}
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
System.out.println(bt[x].toString());
System.out.println(" False negatives: " + falseNeg);
System.out.println(" False positives: " + falsePos);
assertEquals(0, falseNeg);
assertTrue(falsePos < 2 * expErr[x]);
}
}
use of org.apache.hadoop.hbase.io.hfile.HFileContext in project hbase by apache.
the class TestStoreFileScannerWithTagCompression method testReseek.
@Test
public void testReseek() throws Exception {
// write the file
Path f = new Path(ROOT_DIR, "testReseek");
HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).withIncludesTags(true).withCompressTags(true).withDataBlockEncoding(DataBlockEncoding.PREFIX).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(f).withFileContext(meta).build();
writeStoreFile(writer);
writer.close();
StoreFileReader reader = new StoreFileReader(fs, f, cacheConf, conf);
StoreFileScanner s = reader.getStoreFileScanner(false, false, false, 0, 0, false);
try {
// Now do reseek with empty KV to position to the beginning of the file
KeyValue k = KeyValueUtil.createFirstOnRow(Bytes.toBytes("k2"));
s.reseek(k);
Cell kv = s.next();
kv = s.next();
kv = s.next();
byte[] key5 = Bytes.toBytes("k5");
assertTrue(Bytes.equals(key5, 0, key5.length, kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()));
List<Tag> tags = KeyValueUtil.ensureKeyValue(kv).getTags();
assertEquals(1, tags.size());
assertEquals("tag3", Bytes.toString(TagUtil.cloneValue(tags.get(0))));
} finally {
s.close();
}
}
use of org.apache.hadoop.hbase.io.hfile.HFileContext in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
*
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
Aggregations