Search in sources :

Example 1 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class OrcRecordReader method createColumnReaders.

private ColumnReader[] createColumnReaders(List<OrcColumn> columns, List<Type> readTypes, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties) throws OrcCorruptionException {
    ColumnReader[] columnReaders = new ColumnReader[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        int columnIndex = i;
        Type readType = readTypes.get(columnIndex);
        OrcColumn column = columns.get(columnIndex);
        ColumnReader columnReader = createColumnReader(readType, column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
        if (orcCacheProperties.isRowDataCacheEnabled()) {
            columnReader = ColumnReaders.wrapWithCachingStreamReader(columnReader, column, orcCacheStore.getRowDataCache());
        }
        columnReaders[columnIndex] = columnReader;
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) DATASOURCE_FILE_PATH(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_PATH) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DATASOURCE_STRIPE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_NUMBER) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) Slices(io.airlift.slice.Slices) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) DATASOURCE_FILE_MODIFICATION(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_MODIFICATION) OrcType(io.prestosql.orc.metadata.OrcType) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) DATASOURCE_INDEX_LEVEL(io.prestosql.spi.HetuConstant.DATASOURCE_INDEX_LEVEL) ColumnReader(io.prestosql.orc.reader.ColumnReader) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Domain(io.prestosql.spi.predicate.Domain) DATASOURCE_STRIPE_LENGTH(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_LENGTH) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Type(io.prestosql.spi.type.Type) OrcType(io.prestosql.orc.metadata.OrcType) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader)

Example 2 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class OrcRecordReader method validateWritePageChecksum.

private void validateWritePageChecksum(int batchSize) throws IOException {
    if (writeChecksumBuilder.isPresent()) {
        ColumnReader[] columnsReader = getColumnReaders();
        Block[] blocks = new Block[columnsReader.length];
        for (int columnIndex = 0; columnIndex < columnsReader.length; columnIndex++) {
            Block block = columnsReader[columnIndex].readBlock();
            blocks[columnIndex] = block;
            blockLoaded(columnIndex, block);
        }
        Page page = new Page(batchSize, blocks);
        validateWritePageChecksum(page);
    }
}
Also used : Block(io.prestosql.spi.block.Block) Page(io.prestosql.spi.Page) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader)

Example 3 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class OrcRecordReader method nextPage.

public Page nextPage() throws IOException {
    ColumnReader[] columnsReader = getColumnReaders();
    int batchSize = prepareNextBatch();
    if (batchSize < 0) {
        return null;
    }
    for (ColumnReader column : columnsReader) {
        if (column != null) {
            column.prepareNextRead(batchSize);
        }
    }
    batchRead(batchSize);
    matchingRowsInBatchArray = null;
    validateWritePageChecksum(batchSize);
    // create a lazy page
    blockFactory.nextPage();
    Arrays.fill(currentBytesPerCell, 0);
    Block[] blocks = new Block[columnsReader.length];
    for (int i = 0; i < columnsReader.length; i++) {
        int columnIndex = i;
        blocks[columnIndex] = blockFactory.createBlock(batchSize, () -> filterRows(columnsReader[columnIndex].readBlock()), block -> blockLoaded(columnIndex, block));
    }
    // only include page metadata if enabled
    if (pageMetadataEnabled) {
        Properties pageMetadata = new Properties();
        pageCount++;
        pageMetadata.setProperty(DATASOURCE_PAGE_NUMBER, String.valueOf(pageCount));
        if (isCurrentStripeFinished()) {
            // Only set the total page count when the current stripe has finished
            // Therefore whenever this property is available in pageMetaData,
            // it indicates that the stripe has finished and this is the last page
            pageMetadata.setProperty(DATASOURCE_TOTAL_PAGES, String.valueOf(pageCount));
            pageCount = 0;
        }
        pageMetadata.setProperty(DATASOURCE_STRIPE_NUMBER, String.valueOf(currentStripe));
        pageMetadata.setProperty(DATASOURCE_STRIPE_OFFSET, String.valueOf(stripes.get(currentStripe).getOffset()));
        pageMetadata.setProperty(DATASOURCE_STRIPE_LENGTH, String.valueOf(stripes.get(currentStripe).getTotalLength()));
        if (splitMetadata != null) {
            // Skip setting for testing (splitMetadata set as null)
            pageMetadata.setProperty(DATASOURCE_FILE_PATH, splitMetadata.getSplitIdentity());
            pageMetadata.setProperty(DATASOURCE_FILE_MODIFICATION, String.valueOf(splitMetadata.getLastModifiedTime()));
        }
        pageMetadata.setProperty(DATASOURCE_INDEX_LEVEL, "STRIPE");
        return new Page(batchSize, pageMetadata, blocks);
    } else {
        return new Page(batchSize, blocks);
    }
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) DATASOURCE_FILE_PATH(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_PATH) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DATASOURCE_STRIPE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_NUMBER) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) Slices(io.airlift.slice.Slices) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) DATASOURCE_FILE_MODIFICATION(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_MODIFICATION) OrcType(io.prestosql.orc.metadata.OrcType) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) DATASOURCE_INDEX_LEVEL(io.prestosql.spi.HetuConstant.DATASOURCE_INDEX_LEVEL) ColumnReader(io.prestosql.orc.reader.ColumnReader) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Domain(io.prestosql.spi.predicate.Domain) DATASOURCE_STRIPE_LENGTH(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_LENGTH) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Block(io.prestosql.spi.block.Block) Page(io.prestosql.spi.Page) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) Properties(java.util.Properties)

Example 4 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestAbstractNumbericColumnReader method testTypeCoercionDate.

@Test
public void testTypeCoercionDate() throws OrcCorruptionException {
    OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.DATE, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
    ColumnReader actualDateColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
    DateColumnReader expectedDateColumnReader = new DateColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
    assertEquals(actualDateColumnReader.toString(), expectedDateColumnReader.toString());
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) DateColumnReader(io.prestosql.orc.reader.DateColumnReader) ShortColumnReader(io.prestosql.orc.reader.ShortColumnReader) DateColumnReader(io.prestosql.orc.reader.DateColumnReader) LongColumnReader(io.prestosql.orc.reader.LongColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) IntegerColumnReader(io.prestosql.orc.reader.IntegerColumnReader) Test(org.testng.annotations.Test)

Example 5 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestAbstractNumbericColumnReader method testTypeCoercionBigInt.

@Test
public void testTypeCoercionBigInt() throws OrcCorruptionException {
    OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.LONG, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
    ColumnReader actualLongColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
    LongColumnReader expectedLongColumnReader = new LongColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
    assertEquals(actualLongColumnReader.toString(), expectedLongColumnReader.toString());
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) LongColumnReader(io.prestosql.orc.reader.LongColumnReader) ShortColumnReader(io.prestosql.orc.reader.ShortColumnReader) DateColumnReader(io.prestosql.orc.reader.DateColumnReader) LongColumnReader(io.prestosql.orc.reader.LongColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) IntegerColumnReader(io.prestosql.orc.reader.IntegerColumnReader) Test(org.testng.annotations.Test)

Aggregations

ColumnReader (io.prestosql.orc.reader.ColumnReader)15 Block (io.prestosql.spi.block.Block)11 Test (org.testng.annotations.Test)11 CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)7 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)6 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)5 IOException (java.io.IOException)5 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)4 OrcType (io.prestosql.orc.metadata.OrcType)4 DateColumnReader (io.prestosql.orc.reader.DateColumnReader)4 IntegerColumnReader (io.prestosql.orc.reader.IntegerColumnReader)4 LongColumnReader (io.prestosql.orc.reader.LongColumnReader)4 ShortColumnReader (io.prestosql.orc.reader.ShortColumnReader)4 StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)4 Page (io.prestosql.spi.Page)4 PeekingIterator (com.google.common.collect.PeekingIterator)3 Logger (io.airlift.log.Logger)3 Slice (io.airlift.slice.Slice)3 DataSize (io.airlift.units.DataSize)3 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)3