use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class DefaultValuesWriterFactoryTest method doTestValueWriter.
private void doTestValueWriter(PrimitiveTypeName typeName, WriterVersion version, boolean enableDictionary, Class<? extends ValuesWriter> expectedValueWriterClass) {
ColumnDescriptor mockPath = getMockColumn(typeName);
ValuesWriterFactory factory = getDefaultFactory(version, enableDictionary);
ValuesWriter writer = factory.newValuesWriter(mockPath);
validateWriterType(writer, expectedValueWriterClass);
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestBitPackingColumn method validateEncodeDecode.
private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException {
for (PACKING_TYPE type : PACKING_TYPE.values()) {
LOG.debug("{}", type);
final int bound = (int) Math.pow(2, bitLength) - 1;
ValuesWriter w = type.getWriter(bound);
for (int i : vals) {
w.writeInteger(i);
}
byte[] bytes = w.getBytes().toByteArray();
LOG.debug("vals (" + bitLength + "): " + TestBitPacking.toString(vals));
LOG.debug("bytes: {}", TestBitPacking.toString(bytes));
assertEquals(type.toString(), expected, TestBitPacking.toString(bytes));
ValuesReader r = type.getReader(bound);
r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
int[] result = new int[vals.length];
for (int i = 0; i < result.length; i++) {
result[i] = r.readInteger();
}
LOG.debug("result: {}", TestBitPacking.toString(result));
assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);
}
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestDictionary method testFirstPageFallBack.
@Test
public void testFirstPageFallBack() throws IOException {
int COUNT = 1000;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(10000, 10000);
writeDistinct(COUNT, cw, "a");
// not efficient so falls back
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN);
writeRepeated(COUNT, cw, "b");
// still plain because we fell back on first page
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
ValuesReader cr = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionaryFallBack.
@Test
public void testBinaryDictionaryFallBack() throws IOException {
int slabSize = 100;
int maxDictionaryByteSize = 50;
final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
int fallBackThreshold = maxDictionaryByteSize;
int dataSize = 0;
for (long i = 0; i < 100; i++) {
Binary binary = Binary.fromString("str" + i);
cw.writeBytes(binary);
dataSize += (binary.length() + 4);
if (dataSize < fallBackThreshold) {
assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
} else {
assertEquals(PLAIN, cw.getEncoding());
}
}
// Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
ValuesReader reader = new BinaryPlainValuesReader();
reader.initFromPage(100, cw.getBytes().toInputStream());
for (long i = 0; i < 100; i++) {
assertEquals(Binary.fromString("str" + i), reader.readBytes());
}
// simulate cutting the page
cw.reset();
assertEquals(0, cw.getBufferedSize());
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestDictionary method testSecondPageFallBack.
@Test
public void testSecondPageFallBack() throws IOException {
int COUNT = 1000;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
writeRepeated(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeDistinct(COUNT, cw, "b");
// not efficient so falls back
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
writeRepeated(COUNT, cw, "a");
// still plain because we fell back on previous page
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
ValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
cr = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes2, cr, "b");
checkRepeated(COUNT, bytes3, cr, "a");
}
Aggregations