use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class DecorrelateInnerUnnestWithGlobalAggregation method apply.
@Override
public Result apply(CorrelatedJoinNode correlatedJoinNode, Captures captures, Context context) {
// find global aggregation in subquery
List<PlanNode> globalAggregations = PlanNodeSearcher.searchFrom(correlatedJoinNode.getSubquery(), context.getLookup()).where(DecorrelateInnerUnnestWithGlobalAggregation::isGlobalAggregation).recurseOnlyWhen(node -> node instanceof ProjectNode || isGlobalAggregation(node)).findAll();
if (globalAggregations.isEmpty()) {
return Result.empty();
}
// if there are multiple global aggregations, the one that is closest to the source is the "reducing" aggregation, because it reduces multiple input rows to single output row
AggregationNode reducingAggregation = (AggregationNode) globalAggregations.get(globalAggregations.size() - 1);
// find unnest in subquery
Optional<UnnestNode> subqueryUnnest = PlanNodeSearcher.searchFrom(reducingAggregation.getSource(), context.getLookup()).where(node -> isSupportedUnnest(node, correlatedJoinNode.getCorrelation(), context.getLookup())).recurseOnlyWhen(node -> node instanceof ProjectNode || isGroupedAggregation(node)).findFirst();
if (subqueryUnnest.isEmpty()) {
return Result.empty();
}
UnnestNode unnestNode = subqueryUnnest.get();
// assign unique id to input rows to restore semantics of aggregations after rewrite
PlanNode input = new AssignUniqueId(context.getIdAllocator().getNextId(), correlatedJoinNode.getInput(), context.getSymbolAllocator().newSymbol("unique", BIGINT));
// pre-project unnest symbols if they were pre-projected in subquery
// The correlated UnnestNode either unnests correlation symbols directly, or unnests symbols produced by a projection that uses only correlation symbols.
// Here, any underlying projection that was a source of the correlated UnnestNode, is appended as a source of the rewritten UnnestNode.
// If the projection is not necessary for UnnestNode (i.e. it does not produce any unnest symbols), it should be pruned afterwards.
PlanNode unnestSource = context.getLookup().resolve(unnestNode.getSource());
if (unnestSource instanceof ProjectNode) {
ProjectNode sourceProjection = (ProjectNode) unnestSource;
input = new ProjectNode(sourceProjection.getId(), input, Assignments.builder().putIdentities(input.getOutputSymbols()).putAll(sourceProjection.getAssignments()).build());
}
// rewrite correlated join to UnnestNode
Symbol ordinalitySymbol = unnestNode.getOrdinalitySymbol().orElseGet(() -> context.getSymbolAllocator().newSymbol("ordinality", BIGINT));
UnnestNode rewrittenUnnest = new UnnestNode(context.getIdAllocator().getNextId(), input, input.getOutputSymbols(), unnestNode.getMappings(), Optional.of(ordinalitySymbol), LEFT, Optional.empty());
// append mask symbol based on ordinality to distinguish between the unnested rows and synthetic null rows
Symbol mask = context.getSymbolAllocator().newSymbol("mask", BOOLEAN);
ProjectNode sourceWithMask = new ProjectNode(context.getIdAllocator().getNextId(), rewrittenUnnest, Assignments.builder().putIdentities(rewrittenUnnest.getOutputSymbols()).put(mask, new IsNotNullPredicate(ordinalitySymbol.toSymbolReference())).build());
// restore all projections, grouped aggregations and global aggregations from the subquery
PlanNode result = rewriteNodeSequence(context.getLookup().resolve(correlatedJoinNode.getSubquery()), input.getOutputSymbols(), mask, sourceWithMask, reducingAggregation.getId(), unnestNode.getId(), context.getSymbolAllocator(), context.getIdAllocator(), context.getLookup());
// restrict outputs
return Result.ofPlanNode(restrictOutputs(context.getIdAllocator(), result, ImmutableSet.copyOf(correlatedJoinNode.getOutputSymbols())).orElse(result));
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class OrcPageSourceFactory method createOrcPageSource.
private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
for (HiveColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
}
checkArgument(!effectivePredicate.isNone());
OrcDataSource orcDataSource;
boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
try {
Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
if (optionalOrcReader.isEmpty()) {
return new EmptyPageSource();
}
OrcReader reader = optionalOrcReader.get();
if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
validateOrcAcidVersion(path, reader);
}
List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
if (isFullAcid && !originalFilesPresent) {
verifyAcidSchema(reader, path);
Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
fileReadTypes.add(INTEGER);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
}
Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
verifyFileHasColumnNames(fileColumns, path);
// Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
}
Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
} else {
projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
}
TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
for (HiveColumnHandle column : columns) {
OrcColumn orcColumn = null;
OrcReader.ProjectedLayout projectedLayout = null;
Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
if (useOrcColumnNames || isFullAcid) {
String columnName = column.getName().toLowerCase(ENGLISH);
orcColumn = fileColumnsByName.get(columnName);
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
} else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
}
Type readType = column.getType();
if (orcColumn != null) {
int sourceIndex = fileReadColumns.size();
columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
fileReadColumns.add(orcColumn);
fileReadTypes.add(readType);
fileReadLayouts.add(projectedLayout);
// Add predicates on top-level and nested columns
for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
if (nestedColumn != null) {
predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
}
}
} else {
columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
}
}
OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
if (transaction.isDelete()) {
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
} else {
columnAdaptations.add(ColumnAdaptation.rowIdColumn());
}
} else if (transaction.isUpdate()) {
HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
} else {
columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
}
}
return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
} catch (Exception e) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof BlockMissingException) {
throw new TrinoException(HIVE_MISSING_DATA, message, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class AbstractTestHive method testApplyProjection.
@Test
public void testApplyProjection() throws Exception {
ColumnMetadata bigIntColumn0 = new ColumnMetadata("int0", BIGINT);
ColumnMetadata bigIntColumn1 = new ColumnMetadata("int1", BIGINT);
RowType oneLevelRowType = toRowType(ImmutableList.of(bigIntColumn0, bigIntColumn1));
ColumnMetadata oneLevelRow0 = new ColumnMetadata("onelevelrow0", oneLevelRowType);
RowType twoLevelRowType = toRowType(ImmutableList.of(oneLevelRow0, bigIntColumn0, bigIntColumn1));
ColumnMetadata twoLevelRow0 = new ColumnMetadata("twolevelrow0", twoLevelRowType);
List<ColumnMetadata> columnsForApplyProjectionTest = ImmutableList.of(bigIntColumn0, bigIntColumn1, oneLevelRow0, twoLevelRow0);
SchemaTableName tableName = temporaryTable("apply_projection_tester");
doCreateEmptyTable(tableName, ORC, columnsForApplyProjectionTest);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
assertEquals(columnHandles.size(), columnsForApplyProjectionTest.size());
Map<String, ColumnHandle> columnHandleMap = columnHandles.stream().collect(toImmutableMap(handle -> ((HiveColumnHandle) handle).getBaseColumnName(), Function.identity()));
// Emulate symbols coming from the query plan and map them to column handles
Map<String, ColumnHandle> columnHandlesWithSymbols = ImmutableMap.of("symbol_0", columnHandleMap.get("int0"), "symbol_1", columnHandleMap.get("int1"), "symbol_2", columnHandleMap.get("onelevelrow0"), "symbol_3", columnHandleMap.get("twolevelrow0"));
// Create variables for the emulated symbols
Map<String, Variable> symbolVariableMapping = columnHandlesWithSymbols.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, e -> new Variable(e.getKey(), ((HiveColumnHandle) e.getValue()).getBaseType())));
// Create dereference expressions for testing
FieldDereference symbol2Field0 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_2"), 0);
FieldDereference symbol3Field0 = new FieldDereference(oneLevelRowType, symbolVariableMapping.get("symbol_3"), 0);
FieldDereference symbol3Field0Field0 = new FieldDereference(BIGINT, symbol3Field0, 0);
FieldDereference symbol3Field1 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_3"), 1);
Map<String, ColumnHandle> inputAssignments;
List<ConnectorExpression> inputProjections;
Optional<ProjectionApplicationResult<ConnectorTableHandle>> projectionResult;
List<ConnectorExpression> expectedProjections;
Map<String, Type> expectedAssignments;
// Test projected columns pushdown to HiveTableHandle in case of all variable references
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_0", "symbol_1"));
inputProjections = ImmutableList.of(symbolVariableMapping.get("symbol_0"), symbolVariableMapping.get("symbol_1"));
expectedAssignments = ImmutableMap.of("symbol_0", BIGINT, "symbol_1", BIGINT);
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
// Empty result when projected column handles are same as those present in table handle
projectionResult = metadata.applyProjection(session, projectionResult.get().getHandle(), inputProjections, inputAssignments);
assertProjectionResult(projectionResult, true, ImmutableList.of(), ImmutableMap.of());
// Extra columns handles in HiveTableHandle should get pruned
projectionResult = metadata.applyProjection(session, ((HiveTableHandle) tableHandle).withProjectedColumns(ImmutableSet.copyOf(columnHandles)), inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
// Test projection pushdown for dereferences
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2", "symbol_3"));
inputProjections = ImmutableList.of(symbol2Field0, symbol3Field0Field0, symbol3Field1);
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_int0", BIGINT);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_int0", BIGINT));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
// Test reuse of virtual column handles
// Round-1: input projections [symbol_2, symbol_2.int0]. virtual handle is created for symbol_2.int0.
inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"));
inputProjections = ImmutableList.of(symbol2Field0, symbolVariableMapping.get("symbol_2"));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), symbolVariableMapping.get("symbol_2"));
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "symbol_2", oneLevelRowType);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
// Round-2: input projections [symbol_2.int0 and onelevelrow0#f_int0]. Virtual handle is reused.
Assignment newlyCreatedColumn = getOnlyElement(projectionResult.get().getAssignments().stream().filter(handle -> handle.getVariable().equals("onelevelrow0#f_int0")).collect(toList()));
inputAssignments = ImmutableMap.<String, ColumnHandle>builder().putAll(getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"))).put(newlyCreatedColumn.getVariable(), newlyCreatedColumn.getColumn()).buildOrThrow();
inputProjections = ImmutableList.of(symbol2Field0, new Variable("onelevelrow0#f_int0", BIGINT));
projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("onelevelrow0#f_int0", BIGINT));
expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT);
assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestLogicalPlanner method testOffset.
@Test
public void testOffset() {
assertPlan("SELECT name FROM nation OFFSET 2 ROWS", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name")), filter("row_num > BIGINT '2'", rowNumber(pattern -> pattern.partitionBy(ImmutableList.of()), any(tableScan("nation", ImmutableMap.of("name", "name")))).withAlias("row_num", new RowNumberSymbolMatcher())))));
assertPlan("SELECT name FROM nation ORDER BY regionkey OFFSET 2 ROWS", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name")), filter("row_num > BIGINT '2'", rowNumber(pattern -> pattern.partitionBy(ImmutableList.of()), anyTree(sort(ImmutableList.of(sort("regionkey", ASCENDING, LAST)), any(tableScan("nation", ImmutableMap.of("name", "name", "regionkey", "regionkey")))))).withAlias("row_num", new RowNumberSymbolMatcher())))));
assertPlan("SELECT name FROM nation ORDER BY regionkey OFFSET 2 ROWS FETCH NEXT 5 ROWS ONLY", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name")), filter("row_num > BIGINT '2'", rowNumber(pattern -> pattern.partitionBy(ImmutableList.of()), any(topN(7, ImmutableList.of(sort("regionkey", ASCENDING, LAST)), TopNNode.Step.FINAL, anyTree(tableScan("nation", ImmutableMap.of("name", "name", "regionkey", "regionkey")))))).withAlias("row_num", new RowNumberSymbolMatcher())))));
assertPlan("SELECT name FROM nation OFFSET 2 ROWS FETCH NEXT 5 ROWS ONLY", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name")), filter("row_num > BIGINT '2'", rowNumber(pattern -> pattern.partitionBy(ImmutableList.of()), limit(7, any(tableScan("nation", ImmutableMap.of("name", "name"))))).withAlias("row_num", new RowNumberSymbolMatcher())))));
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestPushPredicateIntoTableScan method doesNotFireOnNonDeterministicPredicate.
@Test
public void doesNotFireOnNonDeterministicPredicate() {
ColumnHandle columnHandle = new TpchColumnHandle("nationkey", BIGINT);
tester().assertThat(pushPredicateIntoTableScan).on(p -> p.filter(new ComparisonExpression(EQUAL, functionResolution.functionCallBuilder(QualifiedName.of("rand")).build(), new LongLiteral("42")), p.tableScan(nationTableHandle, ImmutableList.of(p.symbol("nationkey", BIGINT)), ImmutableMap.of(p.symbol("nationkey", BIGINT), columnHandle), TupleDomain.all()))).doesNotFire();
}
Aggregations