use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionary.
@Test
public void testBinaryDictionary() throws IOException {
int COUNT = 100;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000);
writeRepeated(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeRepeated(COUNT, cw, "b");
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
// now we will fall back
writeDistinct(COUNT, cw, "c");
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
DictionaryValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes3, cr2, "c");
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionaryChangedValues.
@Test
public void testBinaryDictionaryChangedValues() throws IOException {
int COUNT = 100;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000);
writeRepeatedWithReuse(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeRepeatedWithReuse(COUNT, cw, "b");
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
// now we will fall back
writeDistinct(COUNT, cw, "c");
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
DictionaryValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes3, cr2, "c");
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithPlainValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class TestDictionary method testFirstPageFallBack.
@Test
public void testFirstPageFallBack() throws IOException {
int COUNT = 1000;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(10000, 10000);
writeDistinct(COUNT, cw, "a");
// not efficient so falls back
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN);
writeRepeated(COUNT, cw, "b");
// still plain because we fell back on first page
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
ValuesReader cr = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
}
use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.
the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithPlainValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
Aggregations