use of org.apache.parquet.column.values.dictionary.IntList.IntIterator in project parquet-mr by apache.
the class DictionaryValuesWriter method getBytes.
@Override
public BytesInput getBytes() {
int maxDicId = getDictionarySize() - 1;
LOG.debug("max dic id {}", maxDicId);
int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
encoders.add(encoder);
IntIterator iterator = encodedValues.iterator();
try {
while (iterator.hasNext()) {
encoder.writeInt(iterator.next());
}
// encodes the bit width
byte[] bytesHeader = new byte[] { (byte) bitWidth };
BytesInput rleEncodedBytes = encoder.toBytes();
LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
// remember size of dictionary when we last wrote a page
lastUsedDictionarySize = getDictionarySize();
lastUsedDictionaryByteSize = dictionaryByteSize;
return bytes;
} catch (IOException e) {
throw new ParquetEncodingException("could not encode the values", e);
}
}
Aggregations