use of com.facebook.presto.memory.context.AggregatedMemoryContext in project presto by prestodb.
the class TestMergeSortedPages method mergeSortedPages.
private static MaterializedResult mergeSortedPages(List<Type> types, List<Integer> sortChannels, List<SortOrder> sortOrder, List<List<Page>> sortedPages) throws Exception {
List<WorkProcessor<Page>> pageProducers = sortedPages.stream().map(WorkProcessor::fromIterable).collect(toImmutableList());
PageWithPositionComparator comparator = new SimplePageWithPositionComparator(types, sortChannels, sortOrder);
AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext().newAggregatedMemoryContext();
WorkProcessor<Page> mergedPages = MergeSortedPages.mergeSortedPages(pageProducers, comparator, types, memoryContext, new DriverYieldSignal());
assertTrue(mergedPages.process());
if (mergedPages.isFinished()) {
return toMaterializedResult(TEST_SESSION, types, ImmutableList.of());
}
Page page = mergedPages.getResult();
assertTrue(mergedPages.process());
assertTrue(mergedPages.isFinished());
assertEquals(memoryContext.getBytes(), 0L);
return toMaterializedResult(TEST_SESSION, types, ImmutableList.of(page));
}
use of com.facebook.presto.memory.context.AggregatedMemoryContext in project presto by prestodb.
the class ParquetPageSourceFactory method createParquetPageSource.
public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
}
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
for (BlockMetaData block : footerBlocks.build()) {
Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
blocks.add(block);
blockIndexStores.add(columnIndexStore.orElse(null));
hiveFileContext.incrementCounter("parquet.blocksRead", 1);
hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
} else {
hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
for (HiveColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
String name = column.getName();
Type type = typeManager.getType(column.getTypeSignature());
namesBuilder.add(name);
typesBuilder.add(type);
if (column.getColumnType() == SYNTHESIZED) {
Subfield pushedDownSubfield = getPushedDownSubfield(column);
List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
if (columnIO.isPresent()) {
fieldsBuilder.add(constructField(type, columnIO.get()));
} else {
fieldsBuilder.add(Optional.empty());
}
} else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
} else {
fieldsBuilder.add(Optional.empty());
}
}
return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
if (e instanceof ParquetCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, e);
}
if (e instanceof AccessControlException) {
throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
}
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.memory.context.AggregatedMemoryContext in project presto by prestodb.
the class TestGenericPartitioningSpiller method testWriteManyPartitions.
@Test
public void testWriteManyPartitions() throws Exception {
List<Type> types = ImmutableList.of(BIGINT);
int partitionCount = 4;
AggregatedMemoryContext memoryContext = mockMemoryContext(scheduledExecutor);
try (GenericPartitioningSpiller spiller = (GenericPartitioningSpiller) factory.create(types, new ModuloPartitionFunction(0, partitionCount), mockSpillContext(), memoryContext)) {
for (int i = 0; i < 50_000; i++) {
Page page = SequencePageBuilder.createSequencePage(types, partitionCount, 0);
PartitioningSpillResult spillResult = spiller.partitionAndSpill(page, partition -> true);
assertEquals(spillResult.getRetained().getPositionCount(), 0);
getFutureValue(spillResult.getSpillingFuture());
getFutureValue(spiller.flush());
}
}
assertEquals(memoryContext.getBytes(), 0, "Reserved bytes should be zeroed after spiller is closed");
}
use of com.facebook.presto.memory.context.AggregatedMemoryContext in project presto by prestodb.
the class TestBroadcastOutputBuffer method testSharedBufferBlockingNoBlockOnFull.
@Test
public void testSharedBufferBlockingNoBlockOnFull() {
SettableFuture<?> blockedFuture = SettableFuture.create();
MockMemoryReservationHandler reservationHandler = new MockMemoryReservationHandler(blockedFuture);
AggregatedMemoryContext memoryContext = newRootAggregatedMemoryContext(reservationHandler, 0L);
Page page = createPage(1);
long pageSize = PAGES_SERDE.serialize(page).getRetainedSizeInBytes();
// create a buffer that can only hold two pages
BroadcastOutputBuffer buffer = createBroadcastBuffer(createInitialEmptyOutputBuffers(BROADCAST), new DataSize(pageSize * 2, BYTE), memoryContext, directExecutor());
OutputBufferMemoryManager memoryManager = buffer.getMemoryManager();
memoryManager.setNoBlockOnFull();
// even if setNoBlockOnFull() is called the buffer should block on memory when we add the first page
// as no memory is available (MockMemoryReservationHandler will return a future that is not done)
enqueuePage(buffer, page);
// more memory is available
blockedFuture.set(null);
memoryManager.onMemoryAvailable();
assertTrue(memoryManager.getBufferBlockedFuture().isDone(), "buffer shouldn't be blocked");
// we should be able to add one more page after more memory is available
addPage(buffer, page);
// the buffer is full now, but setNoBlockOnFull() is called so the buffer shouldn't block
addPage(buffer, page);
}
Aggregations