Search in sources :

Example 21 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestFiltered method testFilteredOrPaged.

@Test
public void testFilteredOrPaged() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 8);
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(or(column("DocId", equalTo(10l)), column("DocId", equalTo(20l)))));
    List<Group> all = readAll(recordReader);
    assertEquals("expecting 8 records " + all, 16, all.size());
    for (int i = 0; i < all.size() / 2; i++) {
        assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString());
        assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString());
    }
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 22 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestMemPageStore method test.

@Test
public void test() throws IOException {
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2);
    LongStatistics stats = new LongStatistics();
    PageWriter pageWriter = memPageStore.getPageWriter(col);
    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    PageReader pageReader = memPageStore.getPageReader(col);
    long totalValueCount = pageReader.getTotalValueCount();
    System.out.println(totalValueCount);
    int total = 0;
    do {
        DataPage readPage = pageReader.readPage();
        total += readPage.getValueCount();
        System.out.println(readPage);
    // TODO: assert
    } while (total < totalValueCount);
}
Also used : LongStatistics(org.apache.parquet.column.statistics.LongStatistics) DataPage(org.apache.parquet.column.page.DataPage) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PageReader(org.apache.parquet.column.page.PageReader) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) PageWriter(org.apache.parquet.column.page.PageWriter) Test(org.junit.Test)

Example 23 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestMemColumn method testMemColumnBinary.

@Test
public void testMemColumnBinary() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    columnWriter.write(Binary.fromString("42"), 0, 0);
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 24 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TupleConsumerPerfTest method main.

public static void main(String[] args) throws Exception {
    String pigSchema = pigSchema(false, false);
    String pigSchemaProjected = pigSchema(true, false);
    String pigSchemaNoString = pigSchema(true, true);
    MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));
    MemPageStore memPageStore = new MemPageStore(0);
    ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(50 * 1024 * 1024).withDictionaryEncoding(false).build());
    write(memPageStore, columns, schema, pigSchema);
    columns.flush();
    read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
    System.out.println(columns.getBufferedSize() + " bytes used total");
    System.out.println("max col size: " + columns.maxColMemSize() + " bytes");
}
Also used : ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType)

Example 25 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestFiltered method testApplyFunctionFilterOnString.

@Test
public void testApplyFunctionFilterOnString() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // First try matching against the A url in record 1
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", applyFunctionToString(new StringEndsWithAPredicate()))));
    readOne(recordReader, "r2 filtered out", r1);
    // Second try matching against the B url in record 1 - it should fail as we only match
    // against the first instance of a
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
    List<Group> all = readAll(recordReader);
    assertEquals("There should be no matching records: " + all, 0, all.size());
    // Finally try matching against the C url in record 2
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Aggregations

MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)26 Test (org.junit.Test)21 Group (org.apache.parquet.example.data.Group)12 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)11 MessageType (org.apache.parquet.schema.MessageType)10 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)8 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)6 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)6 ArrayList (java.util.ArrayList)4 ColumnReader (org.apache.parquet.column.ColumnReader)4 ColumnWriter (org.apache.parquet.column.ColumnWriter)4 GroupWriter (org.apache.parquet.example.data.GroupWriter)4 SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)4 PrimitiveType (org.apache.parquet.schema.PrimitiveType)4 List (java.util.List)3 PageWriter (org.apache.parquet.column.page.PageWriter)2 ParsedVersion (org.apache.parquet.VersionParser.ParsedVersion)1 BytesInput (org.apache.parquet.bytes.BytesInput)1 ParquetProperties (org.apache.parquet.column.ParquetProperties)1 DataPage (org.apache.parquet.column.page.DataPage)1