use of com.facebook.presto.hive.FileFormatDataSourceStats in project presto by prestodb.
the class DeltaPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableLayoutHandle layout, List<ColumnHandle> columns, SplitContext splitContext) {
DeltaSplit deltaSplit = (DeltaSplit) split;
DeltaTableLayoutHandle deltaTableLayoutHandle = (DeltaTableLayoutHandle) layout;
DeltaTableHandle deltaTableHandle = deltaTableLayoutHandle.getTable();
HdfsContext hdfsContext = new HdfsContext(session, deltaSplit.getSchema(), deltaSplit.getTable(), deltaSplit.getFilePath(), false);
Path filePath = new Path(deltaSplit.getFilePath());
List<DeltaColumnHandle> deltaColumnHandles = columns.stream().map(DeltaColumnHandle.class::cast).collect(Collectors.toList());
List<DeltaColumnHandle> regularColumnHandles = deltaColumnHandles.stream().filter(columnHandle -> columnHandle.getColumnType() != PARTITION).collect(Collectors.toList());
ConnectorPageSource dataPageSource = createParquetPageSource(hdfsEnvironment, session.getUser(), hdfsEnvironment.getConfiguration(hdfsContext, filePath), filePath, deltaSplit.getStart(), deltaSplit.getLength(), deltaSplit.getFileSize(), regularColumnHandles, deltaTableHandle.toSchemaTableName(), getParquetMaxReadBlockSize(session), isParquetBatchReadsEnabled(session), isParquetBatchReaderVerificationEnabled(session), typeManager, deltaTableLayoutHandle.getPredicate(), fileFormatDataSourceStats, false);
return new DeltaPageSource(deltaColumnHandles, convertPartitionValues(deltaColumnHandles, deltaSplit.getPartitionValues()), dataPageSource);
}
use of com.facebook.presto.hive.FileFormatDataSourceStats in project presto by prestodb.
the class ParquetTester method testSingleRead.
public static void testSingleRead(Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, ParquetMetadataSource parquetMetadataSource, File dataFile) {
HiveClientConfig config = new HiveClientConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET).setUseParquetColumnNames(false).setParquetMaxReadBlockSize(new DataSize(1_000, DataSize.Unit.BYTE));
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveBatchPageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, new FileFormatDataSourceStats(), parquetMetadataSource);
ConnectorPageSource connectorPageSource = createPageSource(pageSourceFactory, session, dataFile, columnNames, columnTypes, HiveStorageFormat.PARQUET);
Iterator<?>[] expectedValues = stream(readValues).map(Iterable::iterator).toArray(size -> new Iterator<?>[size]);
if (connectorPageSource instanceof RecordPageSource) {
assertRecordCursor(columnTypes, expectedValues, ((RecordPageSource) connectorPageSource).getCursor());
} else {
assertPageSource(columnTypes, expectedValues, connectorPageSource);
}
assertFalse(stream(expectedValues).allMatch(Iterator::hasNext));
}
Aggregations