Search in sources :

Example 16 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testSchema.

private void testSchema(MessageType messageSchema, List<Group> groups) {
    MemPageStore memPageStore = new MemPageStore(groups.size());
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(messageSchema);
    log(columnIO);
    // Write groups.
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    GroupWriter groupWriter = new GroupWriter(recordWriter, messageSchema);
    for (Group group : groups) {
        groupWriter.write(group);
    }
    recordWriter.flush();
    columns.flush();
    // Read groups and verify.
    RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, messageSchema, memPageStore);
    for (Group group : groups) {
        final Group got = recordReader.read();
        assertEquals("deserialization does not display the same result", group.toString(), got.toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter)

Example 17 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testPushParser.

@Test
public void testPushParser() {
    MemPageStore memPageStore = new MemPageStore(1);
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    new GroupWriter(recordWriter, schema).write(r1);
    recordWriter.flush();
    columns.flush();
    RecordReader<Void> recordReader = columnIO.getRecordReader(memPageStore, new ExpectationValidatingConverter(expectedEventsForR1, schema));
    recordReader.read();
}
Also used : ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 18 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testColumnIO.

@Test
public void testColumnIO() {
    log(schema);
    log("r1");
    log(r1);
    log("r2");
    log(r2);
    MemPageStore memPageStore = new MemPageStore(2);
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
    {
        MessageColumnIO columnIO = columnIOFactory.getColumnIO(schema);
        log(columnIO);
        RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
        GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
        groupWriter.write(r1);
        groupWriter.write(r2);
        recordWriter.flush();
        columns.flush();
        log(columns);
        log("=========");
        RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, schema, memPageStore);
        validateFSA(expectedFSA, columnIO, recordReader);
        List<Group> records = new ArrayList<Group>();
        records.add(recordReader.read());
        records.add(recordReader.read());
        int i = 0;
        for (Group record : records) {
            log("r" + (++i));
            log(record);
        }
        assertEquals("deserialization does not display the same result", r1.toString(), records.get(0).toString());
        assertEquals("deserialization does not display the same result", r2.toString(), records.get(1).toString());
    }
    {
        MessageColumnIO columnIO2 = columnIOFactory.getColumnIO(schema2);
        List<Group> records = new ArrayList<Group>();
        RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO2, schema2, memPageStore);
        validateFSA(expectedFSA2, columnIO2, recordReader);
        records.add(recordReader.read());
        records.add(recordReader.read());
        int i = 0;
        for (Group record : records) {
            log("r" + (++i));
            log(record);
        }
        assertEquals("deserialization does not display the expected result", pr1.toString(), records.get(0).toString());
        assertEquals("deserialization does not display the expected result", pr2.toString(), records.get(1).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) ArrayList(java.util.ArrayList) List(java.util.List) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 19 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testReadUsingSchemaWithRequiredFieldThatWasOptional.

@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional() {
    MessageType originalSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "e"));
    MemPageStore store = new MemPageStore(1);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
    writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
    try {
        MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema", // Incompatible schema: required when it was optional
        new PrimitiveType(REQUIRED, INT32, "e"));
        readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
        fail("should have thrown an incompatible schema exception");
    } catch (ParquetDecodingException e) {
        assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 20 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestFiltered method testFilterOnInteger.

@Test
public void testFilterOnInteger() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // Get first record
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
    readOne(recordReader, "r2 filtered out", r1);
    // Get second record
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(20l))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Aggregations

MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)26 Test (org.junit.Test)21 Group (org.apache.parquet.example.data.Group)12 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)11 MessageType (org.apache.parquet.schema.MessageType)10 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)8 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)6 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)6 ArrayList (java.util.ArrayList)4 ColumnReader (org.apache.parquet.column.ColumnReader)4 ColumnWriter (org.apache.parquet.column.ColumnWriter)4 GroupWriter (org.apache.parquet.example.data.GroupWriter)4 SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)4 PrimitiveType (org.apache.parquet.schema.PrimitiveType)4 List (java.util.List)3 PageWriter (org.apache.parquet.column.page.PageWriter)2 ParsedVersion (org.apache.parquet.VersionParser.ParsedVersion)1 BytesInput (org.apache.parquet.bytes.BytesInput)1 ParquetProperties (org.apache.parquet.column.ParquetProperties)1 DataPage (org.apache.parquet.column.page.DataPage)1