use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.
the class ThriftUtilities method colDescFromThrift.
/**
* This utility method creates a new Hbase HColumnDescriptor object based on a
* Thrift ColumnDescriptor "struct".
*
* @param in Thrift ColumnDescriptor object
* @return ModifyableColumnFamilyDescriptor
* @throws IllegalArgument if the column name is empty
*/
public static ColumnFamilyDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument {
Compression.Algorithm comp = Compression.getCompressionAlgorithmByName(in.compression.toLowerCase(Locale.ROOT));
BloomType bt = BloomType.valueOf(in.bloomFilterType);
if (in.name == null || !in.name.hasRemaining()) {
throw new IllegalArgument("column name is empty");
}
byte[] parsedName = CellUtil.parseColumn(Bytes.getBytes(in.name))[0];
return ColumnFamilyDescriptorBuilder.newBuilder(parsedName).setMaxVersions(in.maxVersions).setCompressionType(comp).setInMemory(in.inMemory).setBlockCacheEnabled(in.blockCacheEnabled).setTimeToLive(in.timeToLive > 0 ? in.timeToLive : Integer.MAX_VALUE).setBloomFilterType(bt).build();
}
use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.
the class BulkLoadHFilesTool method copyHFileHalf.
/**
* Copy half of an HFile into a new HFile.
*/
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException {
FileSystem fs = inFile.getFileSystem(conf);
CacheConfig cacheConf = CacheConfig.DISABLED;
HalfStoreFileReader halfReader = null;
StoreFileWriter halfWriter = null;
try {
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, inFile).build();
HFileInfo hfile = new HFileInfo(context, conf);
halfReader = new HalfStoreFileReader(context, hfile, cacheConf, reference, new AtomicInteger(0), conf);
hfile.initMetaAndIndex(halfReader.getHFileReader());
Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
int blocksize = familyDescriptor.getBlocksize();
Algorithm compression = familyDescriptor.getCompressionType();
BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
HFileScanner scanner = halfReader.getScanner(false, false, false);
scanner.seekTo();
do {
halfWriter.append(scanner.getCell());
} while (scanner.next());
for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
if (shouldCopyHFileMetaKey(entry.getKey())) {
halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
}
}
} finally {
if (halfReader != null) {
try {
halfReader.close(cacheConf.shouldEvictOnClose());
} catch (IOException e) {
LOG.warn("failed to close hfile reader for " + inFile, e);
}
}
if (halfWriter != null) {
halfWriter.close();
}
}
}
use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.
the class HBaseTestingUtility method generateColumnDescriptors.
/**
* Create a set of column descriptors with the combination of compression,
* encoding, bloom codecs available.
* @param prefix family names prefix
* @return the list of column descriptors
*/
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
long familyId = 0;
for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
for (BloomType bloomType : BloomType.values()) {
String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
columnFamilyDescriptorBuilder.setCompressionType(compressionType);
columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
familyId++;
}
}
}
return columnFamilyDescriptors;
}
use of org.apache.hadoop.hbase.regionserver.BloomType in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.
the class ChangeBloomFilterAction method perform.
@Override
public void perform() throws Exception {
final Random random = new Random();
final BloomType[] bloomArray = BloomType.values();
final int bloomArraySize = bloomArray.length;
getLogger().info("Performing action: Change bloom filter on all columns of table " + tableName);
modifyAllTableColumns(tableName, (columnName, columnBuilder) -> {
BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)];
getLogger().debug("Performing action: About to set bloom filter type to " + bloomType + " on column " + columnName + " of table " + tableName);
columnBuilder.setBloomFilterType(bloomType);
if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
columnBuilder.setConfiguration(BloomFilterUtil.PREFIX_LENGTH_KEY, "10");
}
});
getLogger().debug("Performing action: Just set bloom filter types on table " + tableName);
}
Aggregations