use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.
the class DataBlockEncodingTool method checkStatistics.
/**
* Check statistics for given HFile for different data block encoders.
* @param scanner Of file which will be compressed.
* @param kvLimit Maximal count of KeyValue which will be processed.
* @throws IOException thrown if scanner is invalid
*/
public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException {
scanner.seek(KeyValue.LOWESTKEY);
KeyValue currentKV;
byte[] previousKey = null;
byte[] currentKey;
DataBlockEncoding[] encodings = DataBlockEncoding.values();
ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream();
int j = 0;
while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
// Iterates through key/value pairs
j++;
currentKey = currentKV.getKey();
if (previousKey != null) {
for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) {
totalKeyRedundancyLength++;
}
}
uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength());
previousKey = currentKey;
int kLen = currentKV.getKeyLength();
int vLen = currentKV.getValueLength();
int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
int restLen = currentKV.getLength() - kLen - vLen;
totalKeyLength += kLen;
totalValueLength += vLen;
totalPrefixLength += restLen;
totalCFLength += cfLen;
}
rawKVs = uncompressedOutputStream.toByteArray();
boolean useTag = (currentKV.getTagsLength() > 0);
for (DataBlockEncoding encoding : encodings) {
if (encoding == DataBlockEncoding.NONE) {
continue;
}
DataBlockEncoder d = encoding.getEncoder();
HFileContext meta = new HFileContextBuilder().withCompression(Compression.Algorithm.NONE).withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTag).build();
codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta));
}
}
use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.
the class DataBlockEncodingTool method verifyCodecs.
/**
* Verify if all data block encoders are working properly.
*
* @param scanner Of file which was compressed.
* @param kvLimit Maximal count of KeyValue which will be processed.
* @return true if all data block encoders compressed/decompressed correctly.
* @throws IOException thrown if scanner is invalid
*/
public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) throws IOException {
KeyValue currentKv;
scanner.seek(KeyValue.LOWESTKEY);
List<Iterator<Cell>> codecIterators = new ArrayList<>();
for (EncodedDataBlock codec : codecs) {
codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)));
}
int j = 0;
while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
// Iterates through key/value pairs
++j;
for (Iterator<Cell> it : codecIterators) {
Cell c = it.next();
KeyValue codecKv = KeyValueUtil.ensureKeyValue(c);
if (codecKv == null || 0 != Bytes.compareTo(codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(), currentKv.getBuffer(), currentKv.getOffset(), currentKv.getLength())) {
if (codecKv == null) {
LOG.error("There is a bug in codec " + it + " it returned null KeyValue,");
} else {
int prefix = 0;
int limitLength = 2 * Bytes.SIZEOF_INT + Math.min(codecKv.getLength(), currentKv.getLength());
while (prefix < limitLength && codecKv.getBuffer()[prefix + codecKv.getOffset()] == currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
prefix++;
}
LOG.error("There is bug in codec " + it.toString() + "\n on element " + j + "\n codecKv.getKeyLength() " + codecKv.getKeyLength() + "\n codecKv.getValueLength() " + codecKv.getValueLength() + "\n codecKv.getLength() " + codecKv.getLength() + "\n currentKv.getKeyLength() " + currentKv.getKeyLength() + "\n currentKv.getValueLength() " + currentKv.getValueLength() + "\n codecKv.getLength() " + currentKv.getLength() + "\n currentKV rowLength " + currentKv.getRowLength() + " familyName " + currentKv.getFamilyLength() + " qualifier " + currentKv.getQualifierLength() + "\n prefix " + prefix + "\n codecKv '" + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset(), prefix) + "' diff '" + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset() + prefix, codecKv.getLength() - prefix) + "'" + "\n currentKv '" + Bytes.toStringBinary(currentKv.getBuffer(), currentKv.getOffset(), prefix) + "' diff '" + Bytes.toStringBinary(currentKv.getBuffer(), currentKv.getOffset() + prefix, currentKv.getLength() - prefix) + "'");
}
return false;
}
}
}
LOG.info("Verification was successful!");
return true;
}
use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.
the class DataBlockEncodingTool method benchmarkCodecs.
/**
* Benchmark codec's speed.
*/
public void benchmarkCodecs() throws IOException {
LOG.info("Starting a throughput benchmark for data block encoding codecs");
int prevTotalSize = -1;
for (EncodedDataBlock codec : codecs) {
prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
}
benchmarkDefaultCompression(prevTotalSize, rawKVs);
}
use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.
the class DataBlockEncodingTool method displayStatistics.
/**
* Display statistics of different compression algorithms.
* @throws IOException
*/
public void displayStatistics() throws IOException {
final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT);
long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;
System.out.println("Raw data size:");
outputTuple("Raw bytes", INT_FORMAT, rawBytes);
outputTuplePct("Key bytes", totalKeyLength);
outputTuplePct("Value bytes", totalValueLength);
outputTuplePct("KV infrastructure", totalPrefixLength);
outputTuplePct("CF overhead", totalCFLength);
outputTuplePct("Total key redundancy", totalKeyRedundancyLength);
int compressedSize = EncodedDataBlock.getCompressedSize(compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length);
outputTuple(comprAlgo + " only size", INT_FORMAT, compressedSize);
outputSavings(comprAlgo + " only", compressedSize, rawBytes);
System.out.println();
for (EncodedDataBlock codec : codecs) {
System.out.println(codec.toString());
long encodedBytes = codec.getSize();
outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);
outputSavings("Key encoding", encodedBytes - totalValueLength, rawBytes - totalValueLength);
outputSavings("Total encoding", encodedBytes, rawBytes);
int encodedCompressedSize = codec.getEncodedCompressedSize(compressionAlgorithm, compressor);
outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT, encodedCompressedSize);
outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);
outputSavings("Encoding with " + comprAlgo, encodedCompressedSize, compressedSize);
System.out.println();
}
}
Aggregations