Search in sources :

Example 6 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testSchema.

private void testSchema(MessageType messageSchema, List<Group> groups) {
    MemPageStore memPageStore = new MemPageStore(groups.size());
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(messageSchema);
    log(columnIO);
    // Write groups.
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    GroupWriter groupWriter = new GroupWriter(recordWriter, messageSchema);
    for (Group group : groups) {
        groupWriter.write(group);
    }
    recordWriter.flush();
    columns.flush();
    // Read groups and verify.
    RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, messageSchema, memPageStore);
    for (Group group : groups) {
        final Group got = recordReader.read();
        assertEquals("deserialization does not display the same result", group.toString(), got.toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter)

Example 7 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testPushParser.

@Test
public void testPushParser() {
    MemPageStore memPageStore = new MemPageStore(1);
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    new GroupWriter(recordWriter, schema).write(r1);
    recordWriter.flush();
    columns.flush();
    RecordReader<Void> recordReader = columnIO.getRecordReader(memPageStore, new ExpectationValidatingConverter(expectedEventsForR1, schema));
    recordReader.read();
}
Also used : ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 8 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testColumnIO.

@Test
public void testColumnIO() {
    log(schema);
    log("r1");
    log(r1);
    log("r2");
    log(r2);
    MemPageStore memPageStore = new MemPageStore(2);
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
    {
        MessageColumnIO columnIO = columnIOFactory.getColumnIO(schema);
        log(columnIO);
        RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
        GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
        groupWriter.write(r1);
        groupWriter.write(r2);
        recordWriter.flush();
        columns.flush();
        log(columns);
        log("=========");
        RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO, schema, memPageStore);
        validateFSA(expectedFSA, columnIO, recordReader);
        List<Group> records = new ArrayList<Group>();
        records.add(recordReader.read());
        records.add(recordReader.read());
        int i = 0;
        for (Group record : records) {
            log("r" + (++i));
            log(record);
        }
        assertEquals("deserialization does not display the same result", r1.toString(), records.get(0).toString());
        assertEquals("deserialization does not display the same result", r2.toString(), records.get(1).toString());
    }
    {
        MessageColumnIO columnIO2 = columnIOFactory.getColumnIO(schema2);
        List<Group> records = new ArrayList<Group>();
        RecordReaderImplementation<Group> recordReader = getRecordReader(columnIO2, schema2, memPageStore);
        validateFSA(expectedFSA2, columnIO2, recordReader);
        records.add(recordReader.read());
        records.add(recordReader.read());
        int i = 0;
        for (Group record : records) {
            log("r" + (++i));
            log(record);
        }
        assertEquals("deserialization does not display the expected result", pr1.toString(), records.get(0).toString());
        assertEquals("deserialization does not display the expected result", pr2.toString(), records.get(1).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) ArrayList(java.util.ArrayList) List(java.util.List) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 9 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class TestTupleRecordConsumer method testFromGroups.

private void testFromGroups(String pigSchemaString, List<Group> input) throws ParserException {
    List<Tuple> tuples = new ArrayList<Tuple>();
    MessageType schema = getMessageType(pigSchemaString);
    RecordMaterializer<Tuple> pigRecordConsumer = newPigRecordConsumer(pigSchemaString);
    GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(new ConverterConsumer(pigRecordConsumer.getRootConverter(), schema)), schema);
    for (Group group : input) {
        groupWriter.write(group);
        final Tuple tuple = pigRecordConsumer.getCurrentRecord();
        tuples.add(tuple);
        LOG.debug("in: {}\nout:{}", group, tuple);
    }
    List<Group> groups = new ArrayList<Group>();
    GroupRecordConverter recordConsumer = new GroupRecordConverter(schema);
    TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer);
    for (Tuple t : tuples) {
        LOG.debug("{}", t);
        tupleWriter.write(t);
        groups.add(recordConsumer.getCurrentRecord());
    }
    assertEquals(input.size(), groups.size());
    for (int i = 0; i < input.size(); i++) {
        Group in = input.get(i);
        LOG.debug("{}", in);
        Group out = groups.get(i);
        assertEquals(in.toString(), out.toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) RecordConsumerLoggingWrapper(org.apache.parquet.io.RecordConsumerLoggingWrapper) ArrayList(java.util.ArrayList) GroupWriter(org.apache.parquet.example.data.GroupWriter) ConverterConsumer(org.apache.parquet.io.ConverterConsumer) Tuple(org.apache.pig.data.Tuple) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

GroupWriter (org.apache.parquet.example.data.GroupWriter)9 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)7 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)6 Group (org.apache.parquet.example.data.Group)5 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)3 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)2 List (java.util.List)1 SimpleGroup (org.apache.parquet.example.data.simple.SimpleGroup)1 ConverterConsumer (org.apache.parquet.io.ConverterConsumer)1 RecordConsumerLoggingWrapper (org.apache.parquet.io.RecordConsumerLoggingWrapper)1 MessageType (org.apache.parquet.schema.MessageType)1 Tuple (org.apache.pig.data.Tuple)1