use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class TestHivePageSourceProvider method testUseRecordReaderWithInputFormatAnnotationAndCustomSplit.
@Test
public void testUseRecordReaderWithInputFormatAnnotationAndCustomSplit() {
StorageFormat storageFormat = StorageFormat.create(ParquetHiveSerDe.class.getName(), HoodieParquetInputFormat.class.getName(), "");
Storage storage = new Storage(storageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of());
Map<String, String> customSplitInfo = ImmutableMap.of(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeFileSplit.class.getName(), HUDI_BASEPATH_KEY, "/test/file.parquet", HUDI_DELTA_FILEPATHS_KEY, "/test/.file_100.log", HUDI_MAX_COMMIT_TIME_KEY, "100");
HiveRecordCursorProvider recordCursorProvider = new MockHiveRecordCursorProvider();
HiveBatchPageSourceFactory hiveBatchPageSourceFactory = new MockHiveBatchPageSourceFactory();
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(recordCursorProvider), ImmutableSet.of(hiveBatchPageSourceFactory), new Configuration(), new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setUseRecordPageSourceForCustomSplit(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new Path("/test/"), OptionalInt.empty(), 0, 100, 200, Instant.now().toEpochMilli(), storage, TupleDomain.none(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(), DateTimeZone.UTC, new TestingTypeManager(), new SchemaTableName("test", "test"), ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), 0, TableToPartitionMapping.empty(), Optional.empty(), false, null, null, false, null, Optional.empty(), customSplitInfo);
assertTrue(pageSource.isPresent());
assertTrue(pageSource.get() instanceof RecordPageSource);
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class TestOrcBatchPageSourceMemoryTracking method testPageSource.
@Test
public void testPageSource() throws Exception {
// Numbers used in assertions in this test may change when implementation is modified,
// feel free to change them if they break in the future
FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
ConnectorPageSource pageSource = testPreparer.newPageSource(stats);
assertEquals(pageSource.getSystemMemoryUsage(), 0);
long memoryUsage = -1;
int totalRows = 0;
while (totalRows < 20000) {
assertFalse(pageSource.isFinished());
Page page = pageSource.getNextPage();
assertNotNull(page);
Block block = page.getBlock(1);
if (memoryUsage == -1) {
// Memory usage before lazy-loading the block
assertBetweenInclusive(pageSource.getSystemMemoryUsage(), 180000L, 189999L);
}
// trigger loading for lazy block
createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
memoryUsage = pageSource.getSystemMemoryUsage();
// Memory usage after lazy-loading the actual block
assertBetweenInclusive(memoryUsage, 390000L, 619999L);
totalRows += page.getPositionCount();
}
memoryUsage = -1;
while (totalRows < 40000) {
assertFalse(pageSource.isFinished());
Page page = pageSource.getNextPage();
assertNotNull(page);
Block block = page.getBlock(1);
if (memoryUsage == -1) {
// Memory usage before lazy-loading the block
assertBetweenInclusive(pageSource.getSystemMemoryUsage(), 180000L, 189999L);
}
// trigger loading for lazy block
createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
memoryUsage = pageSource.getSystemMemoryUsage();
// Memory usage after lazy-loading the actual block
assertBetweenInclusive(memoryUsage, 390000L, 619999L);
totalRows += page.getPositionCount();
}
memoryUsage = -1;
while (totalRows < NUM_ROWS) {
assertFalse(pageSource.isFinished());
Page page = pageSource.getNextPage();
assertNotNull(page);
Block block = page.getBlock(1);
if (memoryUsage == -1) {
// Memory usage before lazy-loading the block
assertBetweenInclusive(pageSource.getSystemMemoryUsage(), 90000L, 99999L);
}
// trigger loading for lazy block
createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
memoryUsage = pageSource.getSystemMemoryUsage();
// Memory usage after lazy-loading the actual block
assertBetweenInclusive(memoryUsage, 430000L, 459999L);
totalRows += page.getPositionCount();
}
assertFalse(pageSource.isFinished());
assertNull(pageSource.getNextPage());
assertTrue(pageSource.isFinished());
assertEquals(pageSource.getSystemMemoryUsage(), 0);
pageSource.close();
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class HiveFileFormatBenchmark method read.
@Benchmark
public List<Page> read(CompressionCounter counter) throws IOException {
if (!fileFormat.supports(data)) {
throw new RuntimeException(fileFormat + " does not support data set " + dataSet);
}
List<Page> pages = new ArrayList<>(100);
try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader(SESSION, HDFS_ENVIRONMENT, dataFile, data.getColumnNames(), data.getColumnTypes())) {
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page != null) {
pages.add(page.getLoadedPage());
}
}
}
counter.inputSize += data.getSize();
counter.outputSize += dataFile.length();
return pages;
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class DeltaPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableLayoutHandle layout, List<ColumnHandle> columns, SplitContext splitContext) {
DeltaSplit deltaSplit = (DeltaSplit) split;
DeltaTableLayoutHandle deltaTableLayoutHandle = (DeltaTableLayoutHandle) layout;
DeltaTableHandle deltaTableHandle = deltaTableLayoutHandle.getTable();
HdfsContext hdfsContext = new HdfsContext(session, deltaSplit.getSchema(), deltaSplit.getTable(), deltaSplit.getFilePath(), false);
Path filePath = new Path(deltaSplit.getFilePath());
List<DeltaColumnHandle> deltaColumnHandles = columns.stream().map(DeltaColumnHandle.class::cast).collect(Collectors.toList());
List<DeltaColumnHandle> regularColumnHandles = deltaColumnHandles.stream().filter(columnHandle -> columnHandle.getColumnType() != PARTITION).collect(Collectors.toList());
ConnectorPageSource dataPageSource = createParquetPageSource(hdfsEnvironment, session.getUser(), hdfsEnvironment.getConfiguration(hdfsContext, filePath), filePath, deltaSplit.getStart(), deltaSplit.getLength(), deltaSplit.getFileSize(), regularColumnHandles, deltaTableHandle.toSchemaTableName(), getParquetMaxReadBlockSize(session), isParquetBatchReadsEnabled(session), isParquetBatchReaderVerificationEnabled(session), typeManager, deltaTableLayoutHandle.getPredicate(), fileFormatDataSourceStats, false);
return new DeltaPageSource(deltaColumnHandles, convertPartitionValues(deltaColumnHandles, deltaSplit.getPartitionValues()), dataPageSource);
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class HivePageSourceProvider method getPageSourceFromCursorProvider.
private static Optional<ConnectorPageSource> getPageSourceFromCursorProvider(Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> hiveColumns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, SchemaTableName tableName, List<HiveColumnHandle> partitionKeyColumnHandles, List<Column> tableDataColumns, Map<String, String> tableParameters, int partitionDataColumnCount, TableToPartitionMapping tableToPartitionMapping, boolean s3SelectPushdownEnabled, RowExpression remainingPredicate, boolean isPushdownFilterEnabled, RowExpressionService rowExpressionService, Map<String, String> customSplitInfo, List<HiveColumnHandle> allColumns, List<ColumnMapping> columnMappings, Set<Integer> outputIndices, List<ColumnMapping> regularAndInterimColumnMappings, Optional<BucketAdaptation> bucketAdaptation) {
if (!hiveColumns.isEmpty() && hiveColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<Column> partitionDataColumns = reconstructPartitionSchema(tableDataColumns, partitionDataColumnCount, tableToPartitionMapping.getPartitionSchemaDifference(), tableToPartitionMapping.getTableToPartitionColumns());
Properties schema = getHiveSchema(storage, partitionDataColumns, tableDataColumns, tableParameters, tableName.getSchemaName(), tableName.getTableName(), partitionKeyColumnHandles.stream().map(column -> column.getName()).collect(toImmutableList()), partitionKeyColumnHandles.stream().map(column -> column.getHiveType()).collect(toImmutableList()));
Optional<RecordCursor> cursor = provider.createRecordCursor(configuration, session, path, start, length, fileSize, schema, toColumnHandles(regularAndInterimColumnMappings, doCoercion), effectivePredicate, hiveStorageTimeZone, typeManager, s3SelectPushdownEnabled, customSplitInfo);
if (cursor.isPresent()) {
RecordCursor delegate = cursor.get();
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, hiveStorageTimeZone, typeManager, delegate);
List<Type> columnTypes = allColumns.stream().map(input -> typeManager.getType(input.getTypeSignature())).collect(toList());
RecordPageSource recordPageSource = new RecordPageSource(columnTypes, hiveRecordCursor);
if (isPushdownFilterEnabled) {
return Optional.of(new FilteringPageSource(columnMappings, effectivePredicate, remainingPredicate, typeManager, rowExpressionService, session, outputIndices, recordPageSource));
}
return Optional.of(recordPageSource);
}
}
return Optional.empty();
}
Aggregations