Search in sources :

Example 11 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestFiltered method testFilterOnString.

@Test
public void testFilterOnString() {
    MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    MemPageStore memPageStore = writeTestRecords(columnIO, 1);
    // First try matching against the A url in record 1
    RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
    RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://A"))));
    readOne(recordReader, "r2 filtered out", r1);
    // Second try matching against the B url in record 1 - it should fail as we only match
    // against the first instance of a
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
    List<Group> all = readAll(recordReader);
    assertEquals("There should be no matching records: " + all, 0, all.size());
    // Finally try matching against the C url in record 2
    recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
    readOne(recordReader, "r1 filtered out", r2);
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) Test(org.junit.Test)

Example 12 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestMemColumn method testMemColumn.

@Test
public void testMemColumn() throws Exception {
    MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
    ColumnDescriptor path = schema.getColumnDescription(new String[] { "foo", "bar" });
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    columnWriter.write(42l, 0, 0);
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getLong(), 42);
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 13 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestMemColumn method testMemColumnSeveralPages.

@Test
public void testMemColumnSeveralPages() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    for (int i = 0; i < 2000; i++) {
        columnWriter.write(42l, 0, 0);
    }
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getLong(), 42);
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 14 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class TestMemColumn method testMemColumnSeveralPagesRepeated.

@Test
public void testMemColumnSeveralPagesRepeated() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { repeated group foo { repeated int64 bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    int[] rs = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
    int[] ds = { 0, 1, 2, 0, 1, 2, 0, 1, 2 };
    for (int i = 0; i < 837; i++) {
        int r = rs[i % rs.length];
        int d = ds[i % ds.length];
        LOG.debug("write i: {}", i);
        if (d == 2) {
            columnWriter.write((long) i, r, d);
        } else {
            columnWriter.writeNull(r, d);
        }
    }
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    int i = 0;
    for (int j = 0; j < columnReader.getTotalValueCount(); j++) {
        int r = rs[i % rs.length];
        int d = ds[i % ds.length];
        LOG.debug("read i: {}", i);
        assertEquals("r row " + i, r, columnReader.getCurrentRepetitionLevel());
        assertEquals("d row " + i, d, columnReader.getCurrentDefinitionLevel());
        if (d == 2) {
            assertEquals("data row " + i, (long) i, columnReader.getLong());
        }
        columnReader.consume();
        ++i;
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 15 with MemPageStore

use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testReadUsingRequestedSchemaWithIncompatibleField.

@Test
public void testReadUsingRequestedSchemaWithIncompatibleField() {
    MessageType originalSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "e"));
    MemPageStore store = new MemPageStore(1);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
    writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
    try {
        MessageType schemaWithIncompatibleField = new MessageType("schema", // Incompatible schema: different type
        new PrimitiveType(OPTIONAL, BINARY, "e"));
        readGroups(store, originalSchema, schemaWithIncompatibleField, 1);
        fail("should have thrown an incompatible schema exception");
    } catch (ParquetDecodingException e) {
        assertEquals("The requested schema is not compatible with the file schema. incompatible types: optional binary e != optional int32 e", e.getMessage());
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)26 Test (org.junit.Test)21 Group (org.apache.parquet.example.data.Group)12 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)11 MessageType (org.apache.parquet.schema.MessageType)10 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)8 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)6 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)6 ArrayList (java.util.ArrayList)4 ColumnReader (org.apache.parquet.column.ColumnReader)4 ColumnWriter (org.apache.parquet.column.ColumnWriter)4 GroupWriter (org.apache.parquet.example.data.GroupWriter)4 SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)4 PrimitiveType (org.apache.parquet.schema.PrimitiveType)4 List (java.util.List)3 PageWriter (org.apache.parquet.column.page.PageWriter)2 ParsedVersion (org.apache.parquet.VersionParser.ParsedVersion)1 BytesInput (org.apache.parquet.bytes.BytesInput)1 ParquetProperties (org.apache.parquet.column.ParquetProperties)1 DataPage (org.apache.parquet.column.page.DataPage)1