use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testSchema.
private void testSchema(MessageType messageSchema, List<Group> groups) {
MemPageStore memPageStore = new MemPageStore(groups.size());
ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
MessageColumnIO columnIO = columnIOFactory.getColumnIO(messageSchema);
log(columnIO);
// Write groups.
RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
GroupWriter groupWriter = new GroupWriter(recordWriter, messageSchema);
for (Group group : groups) {
groupWriter.write(group);
}
recordWriter.flush();
columns.flush();
// Read groups and verify.
RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, messageSchema, memPageStore);
for (Group group : groups) {
final Group got = recordReader.read();
assertEquals("deserialization does not display the same result", group.toString(), got.toString());
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testPushParser.
@Test
public void testPushParser() {
MemPageStore memPageStore = new MemPageStore(1);
ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
new GroupWriter(recordWriter, schema).write(r1);
recordWriter.flush();
columns.flush();
RecordReader<Void> recordReader = columnIO.getRecordReader(memPageStore, new ExpectationValidatingConverter(expectedEventsForR1, schema));
recordReader.read();
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testColumnIO.
@Test
public void testColumnIO() {
log(schema);
log("r1");
log(r1);
log("r2");
log(r2);
MemPageStore memPageStore = new MemPageStore(2);
ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
{
MessageColumnIO columnIO = columnIOFactory.getColumnIO(schema);
log(columnIO);
RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
groupWriter.write(r1);
groupWriter.write(r2);
recordWriter.flush();
columns.flush();
log(columns);
log("=========");
RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, schema, memPageStore);
validateFSA(expectedFSA, columnIO, recordReader);
List<Group> records = new ArrayList<Group>();
records.add(recordReader.read());
records.add(recordReader.read());
int i = 0;
for (Group record : records) {
log("r" + (++i));
log(record);
}
assertEquals("deserialization does not display the same result", r1.toString(), records.get(0).toString());
assertEquals("deserialization does not display the same result", r2.toString(), records.get(1).toString());
}
{
MessageColumnIO columnIO2 = columnIOFactory.getColumnIO(schema2);
List<Group> records = new ArrayList<Group>();
RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO2, schema2, memPageStore);
validateFSA(expectedFSA2, columnIO2, recordReader);
records.add(recordReader.read());
records.add(recordReader.read());
int i = 0;
for (Group record : records) {
log("r" + (++i));
log(record);
}
assertEquals("deserialization does not display the expected result", pr1.toString(), records.get(0).toString());
assertEquals("deserialization does not display the expected result", pr2.toString(), records.get(1).toString());
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testReadUsingSchemaWithRequiredFieldThatWasOptional.
@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional() {
MessageType originalSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema", // Incompatible schema: required when it was optional
new PrimitiveType(REQUIRED, INT32, "e"));
readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestFiltered method testFilterOnInteger.
@Test
public void testFilterOnInteger() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// Get first record
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
readOne(recordReader, "r2 filtered out", r1);
// Get second record
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(20l))));
readOne(recordReader, "r1 filtered out", r2);
}
Aggregations