use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionaryFallBack.
@Test
public void testBinaryDictionaryFallBack() throws IOException {
int slabSize = 100;
int maxDictionaryByteSize = 50;
final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
int fallBackThreshold = maxDictionaryByteSize;
int dataSize = 0;
for (long i = 0; i < 100; i++) {
Binary binary = Binary.fromString("str" + i);
cw.writeBytes(binary);
dataSize += (binary.length() + 4);
if (dataSize < fallBackThreshold) {
assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
} else {
assertEquals(PLAIN, cw.getEncoding());
}
}
// Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
ValuesReader reader = new BinaryPlainValuesReader();
reader.initFromPage(100, cw.getBytes().toInputStream());
for (long i = 0; i < 100; i++) {
assertEquals(Binary.fromString("str" + i), reader.readBytes());
}
// simulate cutting the page
cw.reset();
assertEquals(0, cw.getBufferedSize());
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class TestDictionary method testSecondPageFallBack.
@Test
public void testSecondPageFallBack() throws IOException {
int COUNT = 1000;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
writeRepeated(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeDistinct(COUNT, cw, "b");
// not efficient so falls back
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
writeRepeated(COUNT, cw, "a");
// still plain because we fell back on previous page
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
ValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
cr = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes2, cr, "b");
checkRepeated(COUNT, bytes3, cr, "a");
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class BenchmarkDeltaByteArray method benchmarkSortedStringsWithPlainValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkSortedStringsWithPlainValuesWriter() throws IOException {
PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
Utils.writeData(writer, sortedVals);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
Aggregations