Search in sources :

Example 6 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testGroupWriter.

@Test
public void testGroupWriter() {
    List<Group> result = new ArrayList<Group>();
    final GroupRecordConverter groupRecordConverter = new GroupRecordConverter(schema);
    RecordConsumer groupConsumer = new ConverterConsumer(groupRecordConverter.getRootConverter(), schema);
    GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(groupConsumer), schema);
    groupWriter.write(r1);
    result.add(groupRecordConverter.getCurrentRecord());
    groupWriter.write(r2);
    result.add(groupRecordConverter.getCurrentRecord());
    assertEquals("deserialization does not display the expected result", result.get(0).toString(), r1.toString());
    assertEquals("deserialization does not display the expected result", result.get(1).toString(), r2.toString());
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) ArrayList(java.util.ArrayList) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 7 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testFilterOnInteger.

@Test
public void testFilterOnInteger() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // Get first record
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
    readOne(recordReader, "r2 filtered out", r1);
    // Get second record
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(20l))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 8 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testFilteredOrPaged.

@Test
public void testFilteredOrPaged() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 8);
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(or(column("DocId", equalTo(10l)), column("DocId", equalTo(20l)))));
    List<Group> all = readAll(recordReader);
    assertEquals("expecting 8 records " + all, 16, all.size());
    for (int i = 0; i < all.size() / 2; i++) {
        assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString());
        assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 9 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestTupleRecordConsumer method testFromGroups.

private void testFromGroups(String pigSchemaString, List<Group> input) throws ParserException {
    List<Tuple> tuples = new ArrayList<Tuple>();
    MessageType schema = getMessageType(pigSchemaString);
    RecordMaterializer<Tuple> pigRecordConsumer = newPigRecordConsumer(pigSchemaString);
    GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(new ConverterConsumer(pigRecordConsumer.getRootConverter(), schema)), schema);
    for (Group group : input) {
        groupWriter.write(group);
        final Tuple tuple = pigRecordConsumer.getCurrentRecord();
        tuples.add(tuple);
        LOG.debug("in: {}\nout:{}", group, tuple);
    }
    List<Group> groups = new ArrayList<Group>();
    GroupRecordConverter recordConsumer = new GroupRecordConverter(schema);
    TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer);
    for (Tuple t : tuples) {
        LOG.debug("{}", t);
        tupleWriter.write(t);
        groups.add(recordConsumer.getCurrentRecord());
    }
    assertEquals(input.size(), groups.size());
    for (int i = 0; i < input.size(); i++) {
        Group in = input.get(i);
        LOG.debug("{}", in);
        Group out = groups.get(i);
        assertEquals(in.toString(), out.toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) RecordConsumerLoggingWrapper(org.apache.parquet.io.RecordConsumerLoggingWrapper) ArrayList(java.util.ArrayList) GroupWriter(org.apache.parquet.example.data.GroupWriter) ConverterConsumer(org.apache.parquet.io.ConverterConsumer) Tuple(org.apache.pig.data.Tuple) MessageType(org.apache.parquet.schema.MessageType)

Example 10 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testApplyFunctionFilterOnString.

@Test
public void testApplyFunctionFilterOnString() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // First try matching against the A url in record 1
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", applyFunctionToString(new StringEndsWithAPredicate()))));
    readOne(recordReader, "r2 filtered out", r1);
    // Second try matching against the B url in record 1 - it should fail as we only match
    // against the first instance of a
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
    List<Group> all = readAll(recordReader);
    assertEquals("There should be no matching records: " + all, 0, all.size());
    // Finally try matching against the C url in record 2
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Aggregations

Group (org.apache.parquet.example.data.Group)10 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)10 Test (org.junit.Test)9 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)8 ArrayList (java.util.ArrayList)2 GroupWriter (org.apache.parquet.example.data.GroupWriter)2 SimpleGroup (org.apache.parquet.example.data.simple.SimpleGroup)1 ConverterConsumer (org.apache.parquet.io.ConverterConsumer)1 RecordConsumerLoggingWrapper (org.apache.parquet.io.RecordConsumerLoggingWrapper)1 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)1 MessageType (org.apache.parquet.schema.MessageType)1 Tuple (org.apache.pig.data.Tuple)1