use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoder in project hbase by apache.
the class DataBlockEncodingTool method checkStatistics.
/**
* Check statistics for given HFile for different data block encoders.
* @param scanner Of file which will be compressed.
* @param kvLimit Maximal count of KeyValue which will be processed.
* @throws IOException thrown if scanner is invalid
*/
public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException {
scanner.seek(KeyValue.LOWESTKEY);
KeyValue currentKV;
byte[] previousKey = null;
byte[] currentKey;
DataBlockEncoding[] encodings = DataBlockEncoding.values();
ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream();
int j = 0;
while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
// Iterates through key/value pairs
j++;
currentKey = currentKV.getKey();
if (previousKey != null) {
for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) {
totalKeyRedundancyLength++;
}
}
uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength());
previousKey = currentKey;
int kLen = currentKV.getKeyLength();
int vLen = currentKV.getValueLength();
int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
int restLen = currentKV.getLength() - kLen - vLen;
totalKeyLength += kLen;
totalValueLength += vLen;
totalPrefixLength += restLen;
totalCFLength += cfLen;
}
rawKVs = uncompressedOutputStream.toByteArray();
boolean useTag = (currentKV.getTagsLength() > 0);
for (DataBlockEncoding encoding : encodings) {
if (encoding == DataBlockEncoding.NONE) {
continue;
}
DataBlockEncoder d = encoding.getEncoder();
HFileContext meta = new HFileContextBuilder().withCompression(Compression.Algorithm.NONE).withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTag).build();
codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta));
}
}
Aggregations