Search in sources :

Example 1 with EncodedDataBlock

use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.

the class DataBlockEncodingTool method checkStatistics.

/**
   * Check statistics for given HFile for different data block encoders.
   * @param scanner Of file which will be compressed.
   * @param kvLimit Maximal count of KeyValue which will be processed.
   * @throws IOException thrown if scanner is invalid
   */
public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException {
    scanner.seek(KeyValue.LOWESTKEY);
    KeyValue currentKV;
    byte[] previousKey = null;
    byte[] currentKey;
    DataBlockEncoding[] encodings = DataBlockEncoding.values();
    ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream();
    int j = 0;
    while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
        // Iterates through key/value pairs
        j++;
        currentKey = currentKV.getKey();
        if (previousKey != null) {
            for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) {
                totalKeyRedundancyLength++;
            }
        }
        uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength());
        previousKey = currentKey;
        int kLen = currentKV.getKeyLength();
        int vLen = currentKV.getValueLength();
        int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
        int restLen = currentKV.getLength() - kLen - vLen;
        totalKeyLength += kLen;
        totalValueLength += vLen;
        totalPrefixLength += restLen;
        totalCFLength += cfLen;
    }
    rawKVs = uncompressedOutputStream.toByteArray();
    boolean useTag = (currentKV.getTagsLength() > 0);
    for (DataBlockEncoding encoding : encodings) {
        if (encoding == DataBlockEncoding.NONE) {
            continue;
        }
        DataBlockEncoder d = encoding.getEncoder();
        HFileContext meta = new HFileContextBuilder().withCompression(Compression.Algorithm.NONE).withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTag).build();
        codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta));
    }
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) KeyValue(org.apache.hadoop.hbase.KeyValue) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataBlockEncoder(org.apache.hadoop.hbase.io.encoding.DataBlockEncoder) EncodedDataBlock(org.apache.hadoop.hbase.io.encoding.EncodedDataBlock) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Example 2 with EncodedDataBlock

use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.

the class DataBlockEncodingTool method verifyCodecs.

/**
   * Verify if all data block encoders are working properly.
   *
   * @param scanner Of file which was compressed.
   * @param kvLimit Maximal count of KeyValue which will be processed.
   * @return true if all data block encoders compressed/decompressed correctly.
   * @throws IOException thrown if scanner is invalid
   */
public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) throws IOException {
    KeyValue currentKv;
    scanner.seek(KeyValue.LOWESTKEY);
    List<Iterator<Cell>> codecIterators = new ArrayList<>();
    for (EncodedDataBlock codec : codecs) {
        codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)));
    }
    int j = 0;
    while ((currentKv = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
        // Iterates through key/value pairs
        ++j;
        for (Iterator<Cell> it : codecIterators) {
            Cell c = it.next();
            KeyValue codecKv = KeyValueUtil.ensureKeyValue(c);
            if (codecKv == null || 0 != Bytes.compareTo(codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(), currentKv.getBuffer(), currentKv.getOffset(), currentKv.getLength())) {
                if (codecKv == null) {
                    LOG.error("There is a bug in codec " + it + " it returned null KeyValue,");
                } else {
                    int prefix = 0;
                    int limitLength = 2 * Bytes.SIZEOF_INT + Math.min(codecKv.getLength(), currentKv.getLength());
                    while (prefix < limitLength && codecKv.getBuffer()[prefix + codecKv.getOffset()] == currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
                        prefix++;
                    }
                    LOG.error("There is bug in codec " + it.toString() + "\n on element " + j + "\n codecKv.getKeyLength() " + codecKv.getKeyLength() + "\n codecKv.getValueLength() " + codecKv.getValueLength() + "\n codecKv.getLength() " + codecKv.getLength() + "\n currentKv.getKeyLength() " + currentKv.getKeyLength() + "\n currentKv.getValueLength() " + currentKv.getValueLength() + "\n codecKv.getLength() " + currentKv.getLength() + "\n currentKV rowLength " + currentKv.getRowLength() + " familyName " + currentKv.getFamilyLength() + " qualifier " + currentKv.getQualifierLength() + "\n prefix " + prefix + "\n codecKv   '" + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset(), prefix) + "' diff '" + Bytes.toStringBinary(codecKv.getBuffer(), codecKv.getOffset() + prefix, codecKv.getLength() - prefix) + "'" + "\n currentKv '" + Bytes.toStringBinary(currentKv.getBuffer(), currentKv.getOffset(), prefix) + "' diff '" + Bytes.toStringBinary(currentKv.getBuffer(), currentKv.getOffset() + prefix, currentKv.getLength() - prefix) + "'");
                }
                return false;
            }
        }
    }
    LOG.info("Verification was successful!");
    return true;
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) EncodedDataBlock(org.apache.hadoop.hbase.io.encoding.EncodedDataBlock) Cell(org.apache.hadoop.hbase.Cell)

Example 3 with EncodedDataBlock

use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.

the class DataBlockEncodingTool method benchmarkCodecs.

/**
   * Benchmark codec's speed.
   */
public void benchmarkCodecs() throws IOException {
    LOG.info("Starting a throughput benchmark for data block encoding codecs");
    int prevTotalSize = -1;
    for (EncodedDataBlock codec : codecs) {
        prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
    }
    benchmarkDefaultCompression(prevTotalSize, rawKVs);
}
Also used : EncodedDataBlock(org.apache.hadoop.hbase.io.encoding.EncodedDataBlock)

Example 4 with EncodedDataBlock

use of org.apache.hadoop.hbase.io.encoding.EncodedDataBlock in project hbase by apache.

the class DataBlockEncodingTool method displayStatistics.

/**
   * Display statistics of different compression algorithms.
   * @throws IOException
   */
public void displayStatistics() throws IOException {
    final String comprAlgo = compressionAlgorithmName.toUpperCase(Locale.ROOT);
    long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;
    System.out.println("Raw data size:");
    outputTuple("Raw bytes", INT_FORMAT, rawBytes);
    outputTuplePct("Key bytes", totalKeyLength);
    outputTuplePct("Value bytes", totalValueLength);
    outputTuplePct("KV infrastructure", totalPrefixLength);
    outputTuplePct("CF overhead", totalCFLength);
    outputTuplePct("Total key redundancy", totalKeyRedundancyLength);
    int compressedSize = EncodedDataBlock.getCompressedSize(compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length);
    outputTuple(comprAlgo + " only size", INT_FORMAT, compressedSize);
    outputSavings(comprAlgo + " only", compressedSize, rawBytes);
    System.out.println();
    for (EncodedDataBlock codec : codecs) {
        System.out.println(codec.toString());
        long encodedBytes = codec.getSize();
        outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);
        outputSavings("Key encoding", encodedBytes - totalValueLength, rawBytes - totalValueLength);
        outputSavings("Total encoding", encodedBytes, rawBytes);
        int encodedCompressedSize = codec.getEncodedCompressedSize(compressionAlgorithm, compressor);
        outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT, encodedCompressedSize);
        outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);
        outputSavings("Encoding with " + comprAlgo, encodedCompressedSize, compressedSize);
        System.out.println();
    }
}
Also used : EncodedDataBlock(org.apache.hadoop.hbase.io.encoding.EncodedDataBlock)

Aggregations

EncodedDataBlock (org.apache.hadoop.hbase.io.encoding.EncodedDataBlock)4 KeyValue (org.apache.hadoop.hbase.KeyValue)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 Cell (org.apache.hadoop.hbase.Cell)1 DataBlockEncoder (org.apache.hadoop.hbase.io.encoding.DataBlockEncoder)1 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)1 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)1 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)1