use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.
the class HFileOutputFormat2 method configureDataBlockEncoding.
/**
* Serialize column family to data block encoding map to configuration.
* Invoked while configuring the MR job for incremental load.
*
* @param tableDescriptor to read the properties from
* @param conf to persist serialized values into
* @throws IOException
* on failure to read column family descriptors
*/
@VisibleForTesting
static void configureDataBlockEncoding(HTableDescriptor tableDescriptor, Configuration conf) throws UnsupportedEncodingException {
if (tableDescriptor == null) {
// could happen with mock table instance
return;
}
StringBuilder dataBlockEncodingConfigValue = new StringBuilder();
Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
int i = 0;
for (HColumnDescriptor familyDescriptor : families) {
if (i++ > 0) {
dataBlockEncodingConfigValue.append('&');
}
dataBlockEncodingConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
dataBlockEncodingConfigValue.append('=');
DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
if (encoding == null) {
encoding = DataBlockEncoding.NONE;
}
dataBlockEncodingConfigValue.append(URLEncoder.encode(encoding.toString(), "UTF-8"));
}
conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, dataBlockEncodingConfigValue.toString());
}
use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.
the class TestStoreFile method testDataBlockEncodingMetaData.
/**
* Check if data block encoding information is saved correctly in HFile's
* file info.
*/
@Test
public void testDataBlockEncodingMetaData() throws IOException {
// Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
Path dir = new Path(new Path(testDir, "7e0102"), "familyname");
Path path = new Path(dir, "1234567890");
DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
HFileDataBlockEncoder dataBlockEncoder = new HFileDataBlockEncoderImpl(dataBlockEncoderAlgo);
cacheConf = new CacheConfig(conf);
HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
// Make a store file and write data to it.
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
writer.close();
StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE);
StoreFileReader reader = storeFile.createReader();
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
}
use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project phoenix by apache.
the class MultiHfileOutputFormat method configureDataBlockEncoding.
/**
* Serialize column family to data block encoding map to configuration.
* Invoked while configuring the MR job for incremental load.
*
* @param table to read the properties from
* @param conf to persist serialized values into
* @throws IOException
* on failure to read column family descriptors
*/
static String configureDataBlockEncoding(HTableDescriptor tableDescriptor) throws UnsupportedEncodingException {
StringBuilder dataBlockEncodingConfigValue = new StringBuilder();
if (tableDescriptor == null) {
// could happen with mock table instance
return dataBlockEncodingConfigValue.toString();
}
Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
int i = 0;
for (HColumnDescriptor familyDescriptor : families) {
if (i++ > 0) {
dataBlockEncodingConfigValue.append('&');
}
dataBlockEncodingConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
dataBlockEncodingConfigValue.append('=');
DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
if (encoding == null) {
encoding = DataBlockEncoding.NONE;
}
dataBlockEncodingConfigValue.append(URLEncoder.encode(encoding.toString(), "UTF-8"));
}
return dataBlockEncodingConfigValue.toString();
}
use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.
the class HFileDataBlockEncoderImpl method createFromFileInfo.
public static HFileDataBlockEncoder createFromFileInfo(HFileInfo fileInfo) throws IOException {
DataBlockEncoding encoding = DataBlockEncoding.NONE;
byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
if (dataBlockEncodingType != null) {
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
try {
encoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} catch (IllegalArgumentException ex) {
throw new IOException("Invalid data block encoding type in file info: " + dataBlockEncodingStr, ex);
}
}
if (encoding == DataBlockEncoding.NONE) {
return NoOpDataBlockEncoder.INSTANCE;
}
return new HFileDataBlockEncoderImpl(encoding);
}
Aggregations