Search in sources :

Example 56 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.

the class TestColumnReaderImpl method test.

@Test
public void test() throws Exception {
    MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
    ColumnDescriptor col = schema.getColumns().get(0);
    MemPageWriter pageWriter = new MemPageWriter();
    ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter, ParquetProperties.builder().withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0).withPageSize(2048).build());
    for (int i = 0; i < rows; i++) {
        columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
        if ((i + 1) % 1000 == 0) {
            columnWriterV2.writePage(i);
        }
    }
    columnWriterV2.writePage(rows);
    columnWriterV2.finalizeColumnChunk();
    List<DataPage> pages = pageWriter.getPages();
    int valueCount = 0;
    int rowCount = 0;
    for (DataPage dataPage : pages) {
        valueCount += dataPage.getValueCount();
        rowCount += ((DataPageV2) dataPage).getRowCount();
    }
    assertEquals(rows, rowCount);
    assertEquals(rows, valueCount);
    MemPageReader pageReader = new MemPageReader((long) rows, pages.iterator(), pageWriter.getDictionaryPage());
    ValidatingConverter converter = new ValidatingConverter();
    ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
    for (int i = 0; i < rows; i++) {
        assertEquals(0, columnReader.getCurrentRepetitionLevel());
        assertEquals(0, columnReader.getCurrentDefinitionLevel());
        columnReader.writeCurrentValueToConverter();
        columnReader.consume();
    }
    assertEquals(rows, converter.count);
}
Also used : DataPage(org.apache.parquet.column.page.DataPage) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) MemPageWriter(org.apache.parquet.column.page.mem.MemPageWriter) ColumnReader(org.apache.parquet.column.ColumnReader) MessageType(org.apache.parquet.schema.MessageType) MemPageReader(org.apache.parquet.column.page.mem.MemPageReader) Test(org.junit.Test)

Example 57 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.

the class TestMemPageStore method test.

@Test
public void test() throws IOException {
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2);
    LongStatistics stats = new LongStatistics();
    PageWriter pageWriter = memPageStore.getPageWriter(col);
    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
    PageReader pageReader = memPageStore.getPageReader(col);
    long totalValueCount = pageReader.getTotalValueCount();
    System.out.println(totalValueCount);
    int total = 0;
    do {
        DataPage readPage = pageReader.readPage();
        total += readPage.getValueCount();
        System.out.println(readPage);
    // TODO: assert
    } while (total < totalValueCount);
}
Also used : LongStatistics(org.apache.parquet.column.statistics.LongStatistics) DataPage(org.apache.parquet.column.page.DataPage) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PageReader(org.apache.parquet.column.page.PageReader) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) PageWriter(org.apache.parquet.column.page.PageWriter) Test(org.junit.Test)

Example 58 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.

the class TestMemColumn method testMemColumnBinary.

@Test
public void testMemColumnBinary() throws Exception {
    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
    String[] col = new String[] { "foo", "bar" };
    MemPageStore memPageStore = new MemPageStore(10);
    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
    ColumnDescriptor path1 = mt.getColumnDescription(col);
    ColumnDescriptor path = path1;
    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
    columnWriter.write(Binary.fromString("42"), 0, 0);
    memColumnsStore.flush();
    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
        assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
        assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
        assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
        columnReader.consume();
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) ColumnReader(org.apache.parquet.column.ColumnReader) ColumnWriter(org.apache.parquet.column.ColumnWriter) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 59 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.

the class TestDictionary method initDicReader.

private DictionaryValuesReader initDicReader(ValuesWriter cw, PrimitiveTypeName type) throws IOException {
    final DictionaryPage dictionaryPage = cw.toDictPageAndClose().copy();
    final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] { "foo" }, type, 0, 0);
    final Dictionary dictionary = PLAIN.initDictionary(descriptor, dictionaryPage);
    final DictionaryValuesReader cr = new DictionaryValuesReader(dictionary);
    return cr;
}
Also used : Dictionary(org.apache.parquet.column.Dictionary) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) DictionaryPage(org.apache.parquet.column.page.DictionaryPage)

Example 60 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project parquet-mr by apache.

the class PrintFooter method add.

private static void add(ParquetMetadata footer) {
    for (BlockMetaData blockMetaData : footer.getBlocks()) {
        ++blockCount;
        MessageType schema = footer.getFileMetaData().getSchema();
        recordCount += blockMetaData.getRowCount();
        List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
            add(desc, columnMetaData.getValueCount(), columnMetaData.getTotalSize(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getEncodings(), columnMetaData.getStatistics());
        }
    }
}
Also used : BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)88 MessageType (org.apache.parquet.schema.MessageType)33 PrimitiveType (org.apache.parquet.schema.PrimitiveType)18 Test (org.testng.annotations.Test)18 RichColumnDescriptor (com.facebook.presto.parquet.RichColumnDescriptor)16 ArrayList (java.util.ArrayList)16 GroupType (org.apache.parquet.schema.GroupType)14 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)12 Test (org.junit.Test)12 Domain (com.facebook.presto.common.predicate.Domain)11 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)11 Path (org.apache.hadoop.fs.Path)11 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)11 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)9 HashMap (java.util.HashMap)9 Configuration (org.apache.hadoop.conf.Configuration)9 Type (org.apache.parquet.schema.Type)9 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)8 IOException (java.io.IOException)7