use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class SmallRangeWritingBenchmarkTest method writeRLEWithSmallBitWidthTest.
@BenchmarkOptions(benchmarkRounds = 10, warmupRounds = 2)
@Test
public void writeRLEWithSmallBitWidthTest() {
ValuesWriter writer = new RunLengthBitPackingHybridValuesWriter(2, 100, 20000, new DirectByteBufferAllocator());
runWriteTest(writer);
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionary.
@Test
public void testBinaryDictionary() throws IOException {
int COUNT = 100;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000);
writeRepeated(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeRepeated(COUNT, cw, "b");
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
// now we will fall back
writeDistinct(COUNT, cw, "c");
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
DictionaryValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes3, cr2, "c");
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestDictionary method testBinaryDictionaryChangedValues.
@Test
public void testBinaryDictionaryChangedValues() throws IOException {
int COUNT = 100;
ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000);
writeRepeatedWithReuse(COUNT, cw, "a");
BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
writeRepeatedWithReuse(COUNT, cw, "b");
BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
// now we will fall back
writeDistinct(COUNT, cw, "c");
BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
DictionaryValuesReader cr = initDicReader(cw, BINARY);
checkRepeated(COUNT, bytes1, cr, "a");
checkRepeated(COUNT, bytes2, cr, "b");
BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader();
checkDistinct(COUNT, bytes3, cr2, "c");
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class TestCorruptDeltaByteArrays method testColumnReaderImplWithCorruptPage.
@Test
public void testColumnReaderImplWithCorruptPage() throws Exception {
ColumnDescriptor column = new ColumnDescriptor(new String[] { "s" }, PrimitiveType.PrimitiveTypeName.BINARY, 0, 0);
MemPageStore pages = new MemPageStore(0);
PageWriter memWriter = pages.getPageWriter(column);
ParquetProperties parquetProps = ParquetProperties.builder().withDictionaryEncoding(false).build();
// get generic repetition and definition level bytes to use for pages
ValuesWriter rdValues = parquetProps.newDefinitionLevelWriter(column);
for (int i = 0; i < 10; i += 1) {
rdValues.writeInteger(0);
}
// use a byte array backed BytesInput because it is reused
BytesInput rd = BytesInput.from(rdValues.getBytes().toByteArray());
DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
String lastValue = null;
List<String> values = new ArrayList<String>();
for (int i = 0; i < 10; i += 1) {
lastValue = str(i);
writer.writeBytes(Binary.fromString(lastValue));
values.add(lastValue);
}
memWriter.writePage(BytesInput.concat(rd, rd, writer.getBytes()), 10, /* number of values in the page */
new BinaryStatistics(), rdValues.getEncoding(), rdValues.getEncoding(), writer.getEncoding());
pages.addRowCount(10);
// sets previous to new byte[0]
writer.reset();
corruptWriter(writer, lastValue);
for (int i = 10; i < 20; i += 1) {
String value = str(i);
writer.writeBytes(Binary.fromString(value));
values.add(value);
}
memWriter.writePage(BytesInput.concat(rd, rd, writer.getBytes()), 10, /* number of values in the page */
new BinaryStatistics(), rdValues.getEncoding(), rdValues.getEncoding(), writer.getEncoding());
pages.addRowCount(10);
final List<String> actualValues = new ArrayList<String>();
PrimitiveConverter converter = new PrimitiveConverter() {
@Override
public void addBinary(Binary value) {
actualValues.add(value.toStringUsingUTF8());
}
};
ColumnReaderImpl columnReader = new ColumnReaderImpl(column, pages.getPageReader(column), converter, new ParsedVersion("parquet-mr", "1.6.0", "abcd"));
while (actualValues.size() < columnReader.getTotalValueCount()) {
columnReader.writeCurrentValueToConverter();
columnReader.consume();
}
Assert.assertEquals(values, actualValues);
}
use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.
the class RandomWritingBenchmarkTest method writeRLETest.
@BenchmarkOptions(benchmarkRounds = 10, warmupRounds = 2)
@Test
public void writeRLETest() {
ValuesWriter writer = new RunLengthBitPackingHybridValuesWriter(32, 100, 20000, new DirectByteBufferAllocator());
runWriteTest(writer);
}
Aggregations