use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class AbstractFileFormat method createPageSource.
static ConnectorPageSource createPageSource(HiveRecordCursorProvider cursorProvider, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
Optional<ReaderRecordCursorWithProjections> recordCursorWithProjections = cursorProvider.createRecordCursor(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), createSchema(format, columnNames, columnTypes), readColumns, TupleDomain.all(), TESTING_TYPE_MANAGER, false);
checkState(recordCursorWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
checkState(recordCursorWithProjections.get().getProjectedReaderColumns().isEmpty(), "projection should not be required");
return new RecordPageSource(columnTypes, recordCursorWithProjections.get().getRecordCursor());
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class TestConnectorPushdownRulesWithHive method testPushdownWithDuplicateExpressions.
@Test
public void testPushdownWithDuplicateExpressions() {
String tableName = "duplicate_expressions";
tester().getQueryRunner().execute(format("CREATE TABLE %s (struct_of_bigint, just_bigint) AS SELECT cast(row(5, 6) AS row(a bigint, b bigint)) AS struct_of_int, 5 AS just_bigint WHERE false", tableName));
PushProjectionIntoTableScan pushProjectionIntoTableScan = new PushProjectionIntoTableScan(tester().getPlannerContext(), tester().getTypeAnalyzer(), new ScalarStatsCalculator(tester().getPlannerContext(), tester().getTypeAnalyzer()));
HiveTableHandle hiveTable = new HiveTableHandle(SCHEMA_NAME, tableName, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
TableHandle table = new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable, new HiveTransactionHandle(false));
HiveColumnHandle bigintColumn = createBaseColumn("just_bigint", 1, toHiveType(BIGINT), BIGINT, REGULAR, Optional.empty());
HiveColumnHandle partialColumn = new HiveColumnHandle("struct_of_bigint", 0, toHiveType(ROW_TYPE), ROW_TYPE, Optional.of(new HiveColumnProjectionInfo(ImmutableList.of(0), ImmutableList.of("a"), toHiveType(BIGINT), BIGINT)), REGULAR, Optional.empty());
// Test projection pushdown with duplicate column references
tester().assertThat(pushProjectionIntoTableScan).on(p -> {
SymbolReference column = p.symbol("just_bigint", BIGINT).toSymbolReference();
Expression negation = new ArithmeticUnaryExpression(MINUS, column);
return p.project(Assignments.of(// The column reference is part of both the assignments
p.symbol("column_ref", BIGINT), column, p.symbol("negated_column_ref", BIGINT), negation), p.tableScan(table, ImmutableList.of(p.symbol("just_bigint", BIGINT)), ImmutableMap.of(p.symbol("just_bigint", BIGINT), bigintColumn)));
}).matches(project(ImmutableMap.of("column_ref", expression("just_bigint_0"), "negated_column_ref", expression("- just_bigint_0")), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(bigintColumn))::equals, TupleDomain.all(), ImmutableMap.of("just_bigint_0", bigintColumn::equals))));
// Test Dereference pushdown
tester().assertThat(pushProjectionIntoTableScan).on(p -> {
SubscriptExpression subscript = new SubscriptExpression(p.symbol("struct_of_bigint", ROW_TYPE).toSymbolReference(), new LongLiteral("1"));
Expression sum = new ArithmeticBinaryExpression(ADD, subscript, new LongLiteral("2"));
return p.project(Assignments.of(// The subscript expression instance is part of both the assignments
p.symbol("expr_deref", BIGINT), subscript, p.symbol("expr_deref_2", BIGINT), sum), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_bigint", ROW_TYPE)), ImmutableMap.of(p.symbol("struct_of_bigint", ROW_TYPE), partialColumn.getBaseColumn())));
}).matches(project(ImmutableMap.of("expr_deref", expression(new SymbolReference("struct_of_bigint#a")), "expr_deref_2", expression(new ArithmeticBinaryExpression(ADD, new SymbolReference("struct_of_bigint#a"), new LongLiteral("2")))), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(partialColumn))::equals, TupleDomain.all(), ImmutableMap.of("struct_of_bigint#a", partialColumn::equals))));
metastore.dropTable(SCHEMA_NAME, tableName, true);
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class TestConnectorPushdownRulesWithHive method testProjectionPushdown.
@Test
public void testProjectionPushdown() {
String tableName = "projection_test";
PushProjectionIntoTableScan pushProjectionIntoTableScan = new PushProjectionIntoTableScan(tester().getPlannerContext(), tester().getTypeAnalyzer(), new ScalarStatsCalculator(tester().getPlannerContext(), tester().getTypeAnalyzer()));
tester().getQueryRunner().execute(format("CREATE TABLE %s (struct_of_int) AS " + "SELECT cast(row(5, 6) as row(a bigint, b bigint)) as struct_of_int where false", tableName));
Type baseType = ROW_TYPE;
HiveColumnHandle partialColumn = new HiveColumnHandle("struct_of_int", 0, toHiveType(baseType), baseType, Optional.of(new HiveColumnProjectionInfo(ImmutableList.of(0), ImmutableList.of("a"), toHiveType(BIGINT), BIGINT)), REGULAR, Optional.empty());
HiveTableHandle hiveTable = new HiveTableHandle(SCHEMA_NAME, tableName, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
TableHandle table = new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable, new HiveTransactionHandle(false));
HiveColumnHandle fullColumn = partialColumn.getBaseColumn();
// Test projected columns pushdown to HiveTableHandle in case of full column references
tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("struct_of_int", baseType), p.symbol("struct_of_int", baseType).toSymbolReference()), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).matches(project(ImmutableMap.of("expr", expression("col")), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(fullColumn))::equals, TupleDomain.all(), ImmutableMap.of("col", fullColumn::equals))));
// Rule should return Optional.empty after projected ColumnHandles have been added to HiveTableHandle
tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("struct_of_int", baseType), p.symbol("struct_of_int", baseType).toSymbolReference()), p.tableScan(new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable.withProjectedColumns(ImmutableSet.of(fullColumn)), new HiveTransactionHandle(false)), ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).doesNotFire();
// Test Dereference pushdown
tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("expr_deref", BIGINT), new SubscriptExpression(p.symbol("struct_of_int", baseType).toSymbolReference(), new LongLiteral("1"))), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).matches(project(ImmutableMap.of("expr_deref", expression(new SymbolReference("struct_of_int#a"))), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(partialColumn))::equals, TupleDomain.all(), ImmutableMap.of("struct_of_int#a", partialColumn::equals))));
metastore.dropTable(SCHEMA_NAME, tableName, true);
}
Aggregations