use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.
the class OrcPageSourceFactory method createOrcPageSource.
public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, boolean orcBloomFiltersEnabled) {
OrcDataSource orcDataSource;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
long size = fileSystem.getFileStatus(path).getLen();
FSDataInputStream inputStream = fileSystem.open(path);
orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
try {
OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize);
List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader, path);
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
for (HiveColumnHandle column : physicalColumns) {
if (column.getColumnType() == REGULAR) {
Type type = typeManager.getType(column.getTypeSignature());
includedColumns.put(column.getHiveColumnIndex(), type);
columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
}
}
OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled);
OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage);
return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage);
} catch (Exception e) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.
the class OrcStorageManager method getPageSource.
@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
try {
OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
for (int i = 0; i < columnIds.size(); i++) {
long columnId = columnIds.get(i);
if (isHiddenColumn(columnId)) {
columnIndexes.add(toSpecialIndex(columnId));
continue;
}
Integer index = indexMap.get(columnId);
if (index == null) {
columnIndexes.add(NULL_COLUMN);
} else {
columnIndexes.add(index);
includedColumns.put(index, columnTypes.get(i));
}
}
OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
Optional<ShardRewriter> shardRewriter = Optional.empty();
if (transactionId.isPresent()) {
shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
}
return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
} catch (IOException | RuntimeException e) {
closeQuietly(dataSource);
throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
} catch (Throwable t) {
closeQuietly(dataSource);
throw t;
}
}
use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.
the class ShardStats method doComputeColumnStats.
private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type) throws IOException {
int columnIndex = columnIndex(orcReader.getColumnNames(), columnId);
OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, new AggregatedMemoryContext());
if (type.equals(BooleanType.BOOLEAN)) {
return indexBoolean(type, reader, columnIndex, columnId);
}
if (type.equals(BigintType.BIGINT) || type.equals(DateType.DATE) || type.equals(TimestampType.TIMESTAMP)) {
return indexLong(type, reader, columnIndex, columnId);
}
if (type.equals(DoubleType.DOUBLE)) {
return indexDouble(type, reader, columnIndex, columnId);
}
if (type instanceof VarcharType) {
return indexString(type, reader, columnIndex, columnId);
}
return null;
}
use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.
the class OrcTester method createCustomOrcRecordReader.
static OrcRecordReader createCustomOrcRecordReader(TempFile tempFile, MetadataReader metadataReader, OrcPredicate predicate, Type type) throws IOException {
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
OrcReader orcReader = new OrcReader(orcDataSource, metadataReader, new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
assertEquals(orcReader.getColumnNames(), ImmutableList.of("test"));
assertEquals(orcReader.getFooter().getRowsInRowGroup(), 10_000);
return orcReader.createRecordReader(ImmutableMap.of(0, type), predicate, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
}
use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.
the class TestCachingOrcDataSource method doIntegration.
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException {
OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize);
// 1 for reading file footer
assertEquals(orcDataSource.getReadCount(), 1);
List<StripeInformation> stripes = orcReader.getFooter().getStripes();
// Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
assertGreaterThanOrEqual(stripes.size(), 3);
//verify wrapped by CachingOrcReader
assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class);
OrcRecordReader orcRecordReader = orcReader.createRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
int positionCount = 0;
while (true) {
int batchSize = orcRecordReader.nextBatch();
if (batchSize <= 0) {
break;
}
Block block = orcRecordReader.readBlock(VARCHAR, 0);
positionCount += block.getPositionCount();
}
assertEquals(positionCount, POSITION_COUNT);
}
Aggregations