Search in sources :

Example 1 with ColumnReader

use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.

the class TestMemColumn method testMemColumn.

@Test
public void testMemColumn() throws Exception {
    MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
    ColumnDescriptor path = schema.getColumnDescription(new String[] { "foo", "bar" });
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    columnWriter.write(42l, 0, 0);
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getLong(), 42);
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 2 with ColumnReader

use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.

the class TestMemColumn method testMemColumnSeveralPages.

@Test
public void testMemColumnSeveralPages() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    for (int i = 0; i < 2000; i++) {
        columnWriter.write(42l, 0, 0);
    }
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getLong(), 42);
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 3 with ColumnReader

use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.

the class TestMemColumn method testMemColumnSeveralPagesRepeated.

@Test
public void testMemColumnSeveralPagesRepeated() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { repeated group foo { repeated int64 bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    int[] rs = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
    int[] ds = { 0, 1, 2, 0, 1, 2, 0, 1, 2 };
    for (int i = 0; i < 837; i++) {
        int r = rs[i % rs.length];
        int d = ds[i % ds.length];
        LOG.debug("write i: {}", i);
        if (d == 2) {
            columnWriter.write((long) i, r, d);
        } else {
            columnWriter.writeNull(r, d);
        }
    }
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    int i = 0;
    for (int j = 0; j < columnReader.getTotalValueCount(); j++) {
        int r = rs[i % rs.length];
        int d = ds[i % ds.length];
        LOG.debug("read i: {}", i);
        assertEquals("r row " + i, r, columnReader.getCurrentRepetitionLevel());
        assertEquals("d row " + i, d, columnReader.getCurrentDefinitionLevel());
        if (d == 2) {
            assertEquals("data row " + i, (long) i, columnReader.getLong());
        }
        columnReader.consume();
        ++i;
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 4 with ColumnReader

use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.

the class DumpCommand method dump.

public static void dump(PrettyPrintWriter out, ColumnReadStoreImpl crstore, ColumnDescriptor column, long page, long total, long offset) throws IOException {
    int dmax = column.getMaxDefinitionLevel();
    ColumnReader creader = crstore.getColumnReader(column);
    out.format("*** row group %d of %d, values %d to %d ***%n", page, total, offset, offset + creader.getTotalValueCount() - 1);
    for (long i = 0, e = creader.getTotalValueCount(); i < e; ++i) {
        int rlvl = creader.getCurrentRepetitionLevel();
        int dlvl = creader.getCurrentDefinitionLevel();
        out.format("value %d: R:%d D:%d V:", offset + i, rlvl, dlvl);
        if (dlvl == dmax) {
            PrimitiveStringifier stringifier = column.getPrimitiveType().stringifier();
            switch(column.getType()) {
                case FIXED_LEN_BYTE_ARRAY:
                case INT96:
                case BINARY:
                    out.print(stringifier.stringify(creader.getBinary()));
                    break;
                case BOOLEAN:
                    out.print(stringifier.stringify(creader.getBoolean()));
                    break;
                case DOUBLE:
                    out.print(stringifier.stringify(creader.getDouble()));
                    break;
                case FLOAT:
                    out.print(stringifier.stringify(creader.getFloat()));
                    break;
                case INT32:
                    out.print(stringifier.stringify(creader.getInteger()));
                    break;
                case INT64:
                    out.print(stringifier.stringify(creader.getLong()));
                    break;
            }
        } else {
            out.format("<null>");
        }
        out.println();
        creader.consume();
    }
}
Also used : PrimitiveStringifier(org.apache.parquet.schema.PrimitiveStringifier) ColumnReader(org.apache.parquet.column.ColumnReader)

Example 5 with ColumnReader

use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.

the class RecordReaderImplementation method read.

/**
 * @see org.apache.parquet.io.RecordReader#read()
 */
@Override
public T read() {
    int currentLevel = 0;
    recordRootConverter.start();
    State currentState = states[0];
    do {
        ColumnReader columnReader = currentState.column;
        int d = columnReader.getCurrentDefinitionLevel();
        // creating needed nested groups until the current field (opening tags)
        int depth = currentState.definitionLevelToDepth[d];
        for (; currentLevel <= depth; ++currentLevel) {
            currentState.groupConverterPath[currentLevel].start();
        }
        // set the current value
        if (d >= currentState.maxDefinitionLevel) {
            // not null
            columnReader.writeCurrentValueToConverter();
        }
        columnReader.consume();
        int nextR = currentState.maxRepetitionLevel == 0 ? 0 : columnReader.getCurrentRepetitionLevel();
        // level to go to close current groups
        int next = currentState.nextLevel[nextR];
        for (; currentLevel > next; currentLevel--) {
            currentState.groupConverterPath[currentLevel - 1].end();
        }
        currentState = currentState.nextState[nextR];
    } while (currentState != null);
    recordRootConverter.end();
    T record = recordMaterializer.getCurrentRecord();
    shouldSkipCurrentRecord = record == null;
    if (shouldSkipCurrentRecord) {
        recordMaterializer.skipCurrentRecord();
    }
    return record;
}
Also used : ColumnReader(org.apache.parquet.column.ColumnReader)

Aggregations

ColumnReader (org.apache.parquet.column.ColumnReader)10 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)6 MessageType (org.apache.parquet.schema.MessageType)6 Test (org.junit.Test)6 ColumnWriter (org.apache.parquet.column.ColumnWriter)4 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)4 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)4 DataPage (org.apache.parquet.column.page.DataPage)2 MemPageReader (org.apache.parquet.column.page.mem.MemPageReader)2 MemPageWriter (org.apache.parquet.column.page.mem.MemPageWriter)2 PrimitiveStringifier (org.apache.parquet.schema.PrimitiveStringifier)1