Search in sources :

Example 1 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testPaged.

@Test
public void testPaged() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 6);
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(page(4, 4)));
    List<Group> all = readAll(recordReader);
    assertEquals("expecting records " + all, 4, all.size());
    for (int i = 0; i < all.size(); i++) {
        assertEquals("expecting record", (i % 2 == 0 ? r2 : r1).toString(), all.get(i).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 2 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testFilteredNotPaged.

@Test
public void testFilteredNotPaged() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 8);
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(not(column("DocId", equalTo(10l)))));
    List<Group> all = readAll(recordReader);
    assertEquals("expecting 8 records " + all, 8, all.size());
    for (int i = 0; i < all.size(); i++) {
        assertEquals("expecting record2", r2.toString(), all.get(i).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 3 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testApplyFunctionFilterOnLong.

@Test
public void testApplyFunctionFilterOnLong() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // Get first record
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
    readOne(recordReader, "r2 filtered out", r1);
    // Get second record
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", applyFunctionToLong(new LongGreaterThan15Predicate()))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 4 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testFilteredAndPaged.

@Test
public void testFilteredAndPaged() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 8);
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(and(column("DocId", equalTo(10l)), page(2, 4))));
    List<Group> all = readAll(recordReader);
    assertEquals("expecting 4 records " + all, 4, all.size());
    for (int i = 0; i < all.size(); i++) {
        assertEquals("expecting record1", r1.toString(), all.get(i).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 5 with GroupRecordConverter

use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.

the class TestFiltered method testFilterOnString.

@Test
public void testFilterOnString() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // First try matching against the A url in record 1
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://A"))));
    readOne(recordReader, "r2 filtered out", r1);
    // Second try matching against the B url in record 1 - it should fail as we only match
    // against the first instance of a
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
    List<Group> all = readAll(recordReader);
    assertEquals("There should be no matching records: " + all, 0, all.size());
    // Finally try matching against the C url in record 2
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Aggregations

Group (org.apache.parquet.example.data.Group)10 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)10 Test (org.junit.Test)9 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)8 ArrayList (java.util.ArrayList)2 GroupWriter (org.apache.parquet.example.data.GroupWriter)2 SimpleGroup (org.apache.parquet.example.data.simple.SimpleGroup)1 ConverterConsumer (org.apache.parquet.io.ConverterConsumer)1 RecordConsumerLoggingWrapper (org.apache.parquet.io.RecordConsumerLoggingWrapper)1 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)1 MessageType (org.apache.parquet.schema.MessageType)1 Tuple (org.apache.pig.data.Tuple)1