use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.
the class OrcSelectiveRecordReader method createColumnReaders.
public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
colReaderWithFilter = new IntArraySet();
colReaderWithORFilter = new IntArraySet();
colReaderWithoutFilter = new IntArraySet();
IntArraySet remainingColumns = new IntArraySet();
remainingColumns.addAll(includedColumns.keySet());
for (int i = 0; i < fieldCount; i++) {
// create column reader only for columns which are part of projection and filter.
if (includedColumns.containsKey(i)) {
int columnIndex = i;
OrcColumn column = fileColumns.get(columnIndex);
boolean outputRequired = outputColumns.contains(i);
SelectiveColumnReader columnReader = null;
if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
} else {
columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
if (orcCacheProperties.isRowDataCacheEnabled()) {
columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
}
}
columnReaders[columnIndex] = columnReader;
if (filters.get(i) != null) {
colReaderWithFilter.add(columnIndex);
} else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
colReaderWithORFilter.add(columnIndex);
} else {
colReaderWithoutFilter.add(columnIndex);
}
remainingColumns.remove(columnIndex);
}
}
/* if any still remaining colIdx < 0 */
remainingColumns.removeAll(missingColumns);
for (Integer col : remainingColumns) {
if (col < 0) {
/* should be always true! */
if (filters.get(col) != null) {
colReaderWithFilter.add(col);
} else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
colReaderWithORFilter.add(col);
}
}
}
// specially for alter add column case:
for (int missingColumn : missingColumns) {
if (filters.get(missingColumn) != null) {
colReaderWithFilter.add(missingColumn);
} else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
colReaderWithORFilter.add(missingColumn);
}
}
return columnReaders;
}
use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.
the class TestAbstractNumbericColumnReader method testTypeCoercionInteger.
@Test
public void testTypeCoercionInteger() throws OrcCorruptionException {
OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.INT, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
ColumnReader actualIntegerColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
IntegerColumnReader expectedIntegerColumnReader = new IntegerColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
assertEquals(actualIntegerColumnReader.toString(), expectedIntegerColumnReader.toString());
}
use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.
the class TestAbstractNumbericColumnReader method testTypeCoercionShort.
@Test
public void testTypeCoercionShort() throws OrcCorruptionException {
OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.SHORT, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
ColumnReader actualShortColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
ShortColumnReader expectedShortColumnReader = new ShortColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
assertEquals(actualShortColumnReader.toString(), expectedShortColumnReader.toString());
}
use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.
the class TestCachingColumnReader method testBlockCachedOnStartStripe.
@Test
public void testBlockCachedOnStartStripe() throws IOException {
ColumnReader streamReader = mock(ColumnReader.class);
Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
CachingColumnReader cachingColumnReader = new CachingColumnReader(streamReader, column, cache);
InputStreamSources inputStreamSources = mock(InputStreamSources.class);
Stripe stripe = mock(Stripe.class);
ZoneId fileTimeZone = stripe.getFileTimeZone();
ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
cachingColumnReader.startStripe(fileTimeZone, inputStreamSources, columnEncodings);
verify(streamReader, atLeastOnce()).startStripe(eq(fileTimeZone), eq(inputStreamSources), eq(columnEncodings));
}
use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.
the class TestCachingColumnReader method testCacheLoaderThrowsInterruptedException.
@Test(expectedExceptions = PrestoException.class, expectedExceptionsMessageRegExp = ".*Read interrupted.*")
public void testCacheLoaderThrowsInterruptedException() throws IOException {
ColumnReader columnReader = mock(ColumnReader.class);
Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
InputStreamSources inputStreamSources = mock(InputStreamSources.class);
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
streamSourceMeta.setDataSourceId(orcDataSourceId);
when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
when(columnReader.readBlock()).then((Answer<Block>) invocationOnMock -> {
Thread.currentThread().interrupt();
throw new PrestoException(StandardErrorCode.GENERIC_INTERNAL_ERROR, "Read interrupted");
});
try {
cachingColumnReader.startRowGroup(inputStreamSources);
} catch (IOException ioEx) {
verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
verify(columnReader, times(1)).readBlock();
assertEquals(cache.size(), 0);
throw ioEx;
} finally {
// clear interrupted flag status
Thread.interrupted();
}
}
Aggregations