use of org.apache.hadoop.hbase.io.compress.Compression.Algorithm in project hbase by apache.
the class TestHFileBlock method testPreviousOffsetInternals.
protected void testPreviousOffsetInternals() throws IOException {
// TODO: parameterize these nested loops.
Configuration conf = TEST_UTIL.getConfiguration();
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : BOOLEAN_VALUES) {
for (boolean cacheOnWrite : BOOLEAN_VALUES) {
Random rand = defaultRandom();
LOG.info("testPreviousOffset: Compression algorithm={}, pread={}, cacheOnWrite={}", algo.toString(), pread, cacheOnWrite);
Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset");
List<Long> expectedOffsets = new ArrayList<>();
List<Long> expectedPrevOffsets = new ArrayList<>();
List<BlockType> expectedTypes = new ArrayList<>();
List<ByteBuffer> expectedContents = cacheOnWrite ? new ArrayList<>() : null;
long totalSize = writeBlocks(TEST_UTIL.getConfiguration(), rand, algo, path, expectedOffsets, expectedPrevOffsets, expectedTypes, expectedContents);
FSDataInputStream is = fs.open(path);
HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withCompression(algo).build();
ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
long curOffset = 0;
for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
if (!pread) {
assertEquals(is.getPos(), curOffset + (i == 0 ? 0 : HConstants.HFILEBLOCK_HEADER_SIZE));
}
assertEquals(expectedOffsets.get(i).longValue(), curOffset);
if (detailedLogging) {
LOG.info("Reading block #" + i + " at offset " + curOffset);
}
HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, false);
if (detailedLogging) {
LOG.info("Block #" + i + ": " + b);
}
assertEquals("Invalid block #" + i + "'s type:", expectedTypes.get(i), b.getBlockType());
assertEquals("Invalid previous block offset for block " + i + " of " + "type " + b.getBlockType() + ":", (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
b.sanityCheck();
assertEquals(curOffset, b.getOffset());
// Now re-load this block knowing the on-disk size. This tests a
// different branch in the loader.
HFileBlock b2 = hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, false);
b2.sanityCheck();
assertEquals(b.getBlockType(), b2.getBlockType());
assertEquals(b.getOnDiskSizeWithoutHeader(), b2.getOnDiskSizeWithoutHeader());
assertEquals(b.getOnDiskSizeWithHeader(), b2.getOnDiskSizeWithHeader());
assertEquals(b.getUncompressedSizeWithoutHeader(), b2.getUncompressedSizeWithoutHeader());
assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
assertEquals(curOffset, b2.getOffset());
assertEquals(b.getBytesPerChecksum(), b2.getBytesPerChecksum());
assertEquals(b.getOnDiskDataSizeWithHeader(), b2.getOnDiskDataSizeWithHeader());
assertEquals(0, HFile.getAndResetChecksumFailuresCount());
assertRelease(b2);
curOffset += b.getOnDiskSizeWithHeader();
if (cacheOnWrite) {
// NOTE: cache-on-write testing doesn't actually involve a BlockCache. It simply
// verifies that the unpacked value read back off disk matches the unpacked value
// generated before writing to disk.
HFileBlock newBlock = b.unpack(meta, hbr);
// b's buffer has header + data + checksum while
// expectedContents have header + data only
ByteBuff bufRead = newBlock.getBufferReadOnly();
ByteBuffer bufExpected = expectedContents.get(i);
byte[] tmp = new byte[bufRead.limit() - newBlock.totalChecksumBytes()];
bufRead.get(tmp, 0, tmp.length);
boolean bytesAreCorrect = Bytes.compareTo(tmp, 0, tmp.length, bufExpected.array(), bufExpected.arrayOffset(), bufExpected.limit()) == 0;
String wrongBytesMsg = "";
if (!bytesAreCorrect) {
// Optimization: only construct an error message in case we
// will need it.
wrongBytesMsg = "Expected bytes in block #" + i + " (algo=" + algo + ", pread=" + pread + ", cacheOnWrite=" + cacheOnWrite + "):\n";
wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(), bufExpected.arrayOffset(), Math.min(32 + 10, bufExpected.limit())) + ", actual:\n" + Bytes.toStringBinary(bufRead.array(), bufRead.arrayOffset(), Math.min(32 + 10, bufRead.limit()));
if (detailedLogging) {
LOG.warn("expected header" + HFileBlock.toStringHeader(new SingleByteBuff(bufExpected)) + "\nfound header" + HFileBlock.toStringHeader(bufRead));
LOG.warn("bufread offset " + bufRead.arrayOffset() + " limit " + bufRead.limit() + " expected offset " + bufExpected.arrayOffset() + " limit " + bufExpected.limit());
LOG.warn(wrongBytesMsg);
}
}
assertTrue(wrongBytesMsg, bytesAreCorrect);
assertRelease(newBlock);
if (newBlock != b) {
assertRelease(b);
}
} else {
assertRelease(b);
}
}
assertEquals(curOffset, fs.getFileStatus(path).getLen());
is.close();
}
}
}
}
use of org.apache.hadoop.hbase.io.compress.Compression.Algorithm in project hbase by apache.
the class BulkLoadHFilesTool method copyHFileHalf.
/**
* Copy half of an HFile into a new HFile.
*/
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException {
FileSystem fs = inFile.getFileSystem(conf);
CacheConfig cacheConf = CacheConfig.DISABLED;
HalfStoreFileReader halfReader = null;
StoreFileWriter halfWriter = null;
try {
ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, inFile).build();
HFileInfo hfile = new HFileInfo(context, conf);
halfReader = new HalfStoreFileReader(context, hfile, cacheConf, reference, new AtomicInteger(0), conf);
hfile.initMetaAndIndex(halfReader.getHFileReader());
Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
int blocksize = familyDescriptor.getBlocksize();
Algorithm compression = familyDescriptor.getCompressionType();
BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
HFileScanner scanner = halfReader.getScanner(false, false, false);
scanner.seekTo();
do {
halfWriter.append(scanner.getCell());
} while (scanner.next());
for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
if (shouldCopyHFileMetaKey(entry.getKey())) {
halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
}
}
} finally {
if (halfReader != null) {
try {
halfReader.close(cacheConf.shouldEvictOnClose());
} catch (IOException e) {
LOG.warn("failed to close hfile reader for " + inFile, e);
}
}
if (halfWriter != null) {
halfWriter.close();
}
}
}
use of org.apache.hadoop.hbase.io.compress.Compression.Algorithm in project phoenix by apache.
the class MultiHfileOutputFormat method createRecordWriter.
/**
* @param context
* @return
* @throws IOException
*/
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
return new RecordWriter<TableRowkeyPair, V>() {
// Map of families to writers and how much has been output on the writer.
private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);
private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
private boolean rollRequested = false;
@Override
public void write(TableRowkeyPair row, V cell) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
// null input == user explicitly wants to flush
if (row == null && kv == null) {
rollWriters();
return;
}
// phoenix-2216: start : extract table name from the rowkey
String tableName = row.getTableName();
byte[] rowKey = row.getRowkey().get();
long length = kv.getLength();
byte[] family = CellUtil.cloneFamily(kv);
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
WriterLength wl = this.writers.get(tableAndFamily);
// If this is a new column family, verify that the directory exists
if (wl == null) {
// phoenix-2216: start : create a directory for table and family within the output dir
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
// phoenix-2216: end
}
// maxsize, we need to roll all the writers
if (wl != null && wl.written + length >= maxsize) {
this.rollRequested = true;
}
// This can only happen once a row is finished though
if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
rollWriters();
}
// create a new WAL writer, if necessary
if (wl == null || wl.writer == null) {
// phoenix-2216: start : passed even the table name
wl = getNewWriter(tableName, family, conf);
// phoenix-2216: end
}
// we now have the proper WAL writer. full steam ahead
kv.updateLatestStamp(this.now);
wl.writer.append(kv);
wl.written += length;
// Copy the row so we know when a row transition.
this.previousRow = rowKey;
}
private void rollWriters() throws IOException {
for (WriterLength wl : this.writers.values()) {
if (wl.writer != null) {
LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
close(wl.writer);
}
wl.writer = null;
wl.written = 0;
}
this.rollRequested = false;
}
/* Create a new StoreFile.Writer.
* @param family
* @return A WriterLength, containing a new StoreFile.Writer.
* @throws IOException
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
WriterLength wl = new WriterLength();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
Path familydir = new Path(tableOutputPath, Bytes.toString(family));
// phoenix-2216: start : fetching the configuration properties that were set to the table.
// create a map from column family to the compression algorithm for the table.
final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
// phoenix-2216: end
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
final DataBlockEncoding overriddenEncoding;
if (dataBlockEncodingStr != null) {
overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} else {
overriddenEncoding = null;
}
Algorithm compression = compressionMap.get(family);
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(family);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
Integer blockSize = blockSizeMap.get(family);
blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
DataBlockEncoding encoding = overriddenEncoding;
encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
contextBuilder.withDataBlockEncoding(encoding);
HFileContext hFileContext = contextBuilder.build();
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
// join and put it in the writers map .
// phoenix-2216: start : holds a map of writers where the
// key in the map is a join byte array of table name and family.
byte[] tableAndFamily = join(tableName, Bytes.toString(family));
this.writers.put(tableAndFamily, wl);
// phoenix-2216: end
return wl;
}
private void close(final StoreFile.Writer w) throws IOException {
if (w != null) {
w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
w.appendTrackedTimestampsToMetadata();
w.close();
}
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (WriterLength wl : this.writers.values()) {
close(wl.writer);
}
}
};
}
use of org.apache.hadoop.hbase.io.compress.Compression.Algorithm in project hbase by apache.
the class TestHFileOutputFormat2 method testSerializeDeserializeFamilyCompressionMap.
/**
* Test for {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
* Tests that the family compression map is correctly serialized into
* and deserialized from configuration
*
* @throws IOException
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
for (int numCfs = 0; numCfs <= 3; numCfs++) {
Configuration conf = new Configuration(this.util.getConfiguration());
Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamiliesForCompression(numCfs);
Table table = Mockito.mock(Table.class);
setupMockColumnFamiliesForCompression(table, familyToCompression);
conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY, HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.compressionDetails, Arrays.asList(table.getDescriptor())));
// read back family specific compression setting from the configuration
Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2.createFamilyCompressionMap(conf);
// used mock values
for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToCompressionMap.get(Bytes.toBytes(entry.getKey())));
}
}
}
use of org.apache.hadoop.hbase.io.compress.Compression.Algorithm in project hbase by apache.
the class HFileOutputFormat2 method createFamilyCompressionMap.
/**
* Runs inside the task to deserialize column family to compression algorithm
* map from the configuration.
*
* @param conf to read the serialized values from
* @return a map from column family to the configured compression algorithm
*/
@InterfaceAudience.Private
static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration conf) {
Map<byte[], String> stringMap = createFamilyConfValueMap(conf, COMPRESSION_FAMILIES_CONF_KEY);
Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
compressionMap.put(e.getKey(), algorithm);
}
return compressionMap;
}
Aggregations