use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestCorruptDeltaByteArrays method testColumnReaderImplWithCorruptPage.
@Test
public void testColumnReaderImplWithCorruptPage() throws Exception {
ColumnDescriptor column = new ColumnDescriptor(new String[] { "s" }, PrimitiveType.PrimitiveTypeName.BINARY, 0, 0);
MemPageStore pages = new MemPageStore(0);
PageWriter memWriter = pages.getPageWriter(column);
ParquetProperties parquetProps = ParquetProperties.builder().withDictionaryEncoding(false).build();
// get generic repetition and definition level bytes to use for pages
ValuesWriter rdValues = parquetProps.newDefinitionLevelWriter(column);
for (int i = 0; i < 10; i += 1) {
rdValues.writeInteger(0);
}
// use a byte array backed BytesInput because it is reused
BytesInput rd = BytesInput.from(rdValues.getBytes().toByteArray());
DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
String lastValue = null;
List<String> values = new ArrayList<String>();
for (int i = 0; i < 10; i += 1) {
lastValue = str(i);
writer.writeBytes(Binary.fromString(lastValue));
values.add(lastValue);
}
memWriter.writePage(BytesInput.concat(rd, rd, writer.getBytes()), 10, /* number of values in the page */
new BinaryStatistics(), rdValues.getEncoding(), rdValues.getEncoding(), writer.getEncoding());
pages.addRowCount(10);
// sets previous to new byte[0]
writer.reset();
corruptWriter(writer, lastValue);
for (int i = 10; i < 20; i += 1) {
String value = str(i);
writer.writeBytes(Binary.fromString(value));
values.add(value);
}
memWriter.writePage(BytesInput.concat(rd, rd, writer.getBytes()), 10, /* number of values in the page */
new BinaryStatistics(), rdValues.getEncoding(), rdValues.getEncoding(), writer.getEncoding());
pages.addRowCount(10);
final List<String> actualValues = new ArrayList<String>();
PrimitiveConverter converter = new PrimitiveConverter() {
@Override
public void addBinary(Binary value) {
actualValues.add(value.toStringUsingUTF8());
}
};
ColumnReaderImpl columnReader = new ColumnReaderImpl(column, pages.getPageReader(column), converter, new ParsedVersion("parquet-mr", "1.6.0", "abcd"));
while (actualValues.size() < columnReader.getTotalValueCount()) {
columnReader.writeCurrentValueToConverter();
columnReader.consume();
}
Assert.assertEquals(values, actualValues);
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testReadUsingRequestedSchemaWithExtraFields.
@Test
public void testReadUsingRequestedSchemaWithExtraFields() {
MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"));
MessageType schemaWithExtraField = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"), new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "c"));
MemPageStore memPageStoreForOriginalSchema = new MemPageStore(1);
MemPageStore memPageStoreForSchemaWithExtraField = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, memPageStoreForOriginalSchema, groupFactory.newGroup().append("a", 1).append("b", 2));
SimpleGroupFactory groupFactory2 = new SimpleGroupFactory(schemaWithExtraField);
writeGroups(schemaWithExtraField, memPageStoreForSchemaWithExtraField, groupFactory2.newGroup().append("a", 1).append("b", 2).append("c", 3));
{
List<Group> groups = new ArrayList<Group>();
groups.addAll(readGroups(memPageStoreForOriginalSchema, orginalSchema, schemaWithExtraField, 1));
groups.addAll(readGroups(memPageStoreForSchemaWithExtraField, schemaWithExtraField, schemaWithExtraField, 1));
// TODO: add once we have the support for empty projection
// groups1.addAll(readGroups(memPageStore3, schema3, schema2, 1));
Object[][] expected = { { 2, 1, null }, { 2, 1, 3 } // { null, null}
};
validateGroups(groups, expected);
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testReadUsingProjectedSchema.
@Test
public void testReadUsingProjectedSchema() {
MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(REQUIRED, INT32, "b"));
MessageType projectedSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
{
List<Group> groups = new ArrayList<Group>();
groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
Object[][] expected = { { 2 } };
validateGroups(groups, expected);
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testEmptyField.
@Test
public void testEmptyField() {
MemPageStore memPageStore = new MemPageStore(1);
ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
final RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
recordWriter.startMessage();
recordWriter.startField("DocId", 0);
recordWriter.addLong(0);
recordWriter.endField("DocId", 0);
recordWriter.startField("Links", 1);
try {
recordWriter.endField("Links", 1);
Assert.fail("expected exception because of empty field");
} catch (ParquetEncodingException e) {
Assert.assertEquals("empty fields are illegal, the field should be ommited completely instead", e.getMessage());
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestFiltered method writeTestRecords.
private MemPageStore writeTestRecords(MessageColumnIO columnIO, int number) {
MemPageStore memPageStore = new MemPageStore(number * 2);
ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(800).withDictionaryEncoding(false).build());
RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
for (int i = 0; i < number; i++) {
groupWriter.write(r1);
groupWriter.write(r2);
}
recordWriter.flush();
columns.flush();
return memPageStore;
}
Aggregations