use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.
the class TestStoreFile method testBasicHalfMapFile.
/**
* Write a file and then assert that we can read from top and bottom halves
* using two HalfMapFiles.
* @throws Exception
*/
@Test
public void testBasicHalfMapFile() throws Exception {
final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, new Path(testDir, hri.getTable().getNameAsString()), hri);
HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(regionFs.createTempName()).withFileContext(meta).build();
writeStoreFile(writer);
Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE);
checkHalfHFile(regionFs, sf);
}
use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.
the class TestStoreFile method testBloomFilter.
@Test
public void testBloomFilter() throws Exception {
FileSystem fs = FileSystem.getLocal(conf);
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
// write the file
Path f = new Path(ROOT_DIR, getName());
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
bloomWriteRead(writer, fs);
}
use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.
the class TestStoreFile method testBloomTypes.
@Test
public void testBloomTypes() throws Exception {
float err = (float) 0.01;
FileSystem fs = FileSystem.getLocal(conf);
conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
int rowCount = 50;
int colCount = 10;
int versions = 2;
// run once using columns and once using rows
BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
int[] expKeys = { rowCount * colCount, rowCount };
// below line deserves commentary. it is expected bloom false positives
// column = rowCount*2*colCount inserts
// row-level = only rowCount*2 inserts, but failures will be magnified by
// 2nd for loop for every column (2*colCount)
float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
for (int x : new int[] { 0, 1 }) {
// write the file
Path f = new Path(ROOT_DIR, getName() + x);
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
long now = System.currentTimeMillis();
for (int i = 0; i < rowCount * 2; i += 2) {
// rows
for (int j = 0; j < colCount * 2; j += 2) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
for (int k = 0; k < versions; ++k) {
// versions
KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(), ("col" + col).getBytes(), now - k, Bytes.toBytes((long) -1));
writer.append(kv);
}
}
}
writer.close();
StoreFileReader reader = new StoreFileReader(fs, f, cacheConf, conf);
reader.loadFileInfo();
reader.loadBloomfilter();
StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
Store store = mock(Store.class);
HColumnDescriptor hcd = mock(HColumnDescriptor.class);
when(hcd.getName()).thenReturn(Bytes.toBytes("family"));
when(store.getFamily()).thenReturn(hcd);
// check false positives rate
int falsePos = 0;
int falseNeg = 0;
for (int i = 0; i < rowCount * 2; ++i) {
// rows
for (int j = 0; j < colCount * 2; ++j) {
// column qualifiers
String row = String.format(localFormatter, i);
String col = String.format(localFormatter, j);
TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
columns.add(("col" + col).getBytes());
Scan scan = new Scan(row.getBytes(), row.getBytes());
scan.addColumn("family".getBytes(), ("col" + col).getBytes());
boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
boolean shouldRowExist = i % 2 == 0;
boolean shouldColExist = j % 2 == 0;
shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
if (shouldRowExist && shouldColExist) {
if (!exists)
falseNeg++;
} else {
if (exists)
falsePos++;
}
}
}
// evict because we are about to delete the file
reader.close(true);
fs.delete(f, true);
System.out.println(bt[x].toString());
System.out.println(" False negatives: " + falseNeg);
System.out.println(" False positives: " + falsePos);
assertEquals(0, falseNeg);
assertTrue(falsePos < 2 * expErr[x]);
}
}
use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.
the class MobUtils method createWriter.
/**
* Creates a writer for the mob file in temp directory.
* @param conf The current configuration.
* @param fs The current file system.
* @param family The descriptor of the current column family.
* @param path The path for a temp directory.
* @param maxKeyCount The key count.
* @param compression The compression algorithm.
* @param cacheConfig The current cache config.
* @param cryptoContext The encryption context.
* @param checksumType The checksum type.
* @param bytesPerChecksum The bytes per checksum.
* @param blocksize The HFile block size.
* @param bloomType The bloom filter type.
* @param isCompaction If the writer is used in compaction.
* @return The writer for the mob file.
*/
public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction) throws IOException {
if (compression == null) {
compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
}
final CacheConfig writerCacheConf;
if (isCompaction) {
writerCacheConf = new CacheConfig(cacheConfig);
writerCacheConf.setCacheDataOnWrite(false);
} else {
writerCacheConf = cacheConfig;
}
HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags()).withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum).withBlockSize(blocksize).withHBaseCheckSum(true).withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext).withCreateTime(EnvironmentEdgeManager.currentTime()).build();
StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path).withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext).build();
return w;
}
Aggregations