use of java.util.Optional in project presto by prestodb.
the class QueryPlanner method aggregate.
private PlanBuilder aggregate(PlanBuilder subPlan, QuerySpecification node) {
List<List<Expression>> groupingSets = analysis.getGroupingSets(node);
if (groupingSets.isEmpty()) {
return subPlan;
}
// 1. Pre-project all scalar inputs (arguments and non-trivial group by expressions)
Set<Expression> distinctGroupingColumns = groupingSets.stream().flatMap(Collection::stream).collect(toImmutableSet());
ImmutableList.Builder<Expression> arguments = ImmutableList.builder();
analysis.getAggregates(node).stream().map(FunctionCall::getArguments).flatMap(List::stream).forEach(arguments::add);
// filter expressions need to be projected first
analysis.getAggregates(node).stream().map(FunctionCall::getFilter).filter(Optional::isPresent).map(Optional::get).forEach(arguments::add);
Iterable<Expression> inputs = Iterables.concat(distinctGroupingColumns, arguments.build());
subPlan = handleSubqueries(subPlan, node, inputs);
if (!Iterables.isEmpty(inputs)) {
// avoid an empty projection if the only aggregation is COUNT (which has no arguments)
subPlan = project(subPlan, inputs);
}
// 2. Aggregate
// 2.a. Rewrite aggregate arguments
TranslationMap argumentTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
ImmutableMap.Builder<Symbol, Symbol> argumentMappingBuilder = ImmutableMap.builder();
for (Expression argument : arguments.build()) {
Expression parametersReplaced = ExpressionTreeRewriter.rewriteWith(new ParameterRewriter(analysis.getParameters(), analysis), argument);
argumentTranslations.addIntermediateMapping(argument, parametersReplaced);
Symbol input = subPlan.translate(parametersReplaced);
if (!argumentTranslations.containsSymbol(parametersReplaced)) {
Symbol output = symbolAllocator.newSymbol(parametersReplaced, analysis.getTypeWithCoercions(parametersReplaced), "arg");
argumentMappingBuilder.put(output, input);
argumentTranslations.put(parametersReplaced, output);
}
}
Map<Symbol, Symbol> argumentMappings = argumentMappingBuilder.build();
// 2.b. Rewrite grouping columns
TranslationMap groupingTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
Map<Symbol, Symbol> groupingSetMappings = new HashMap<>();
List<List<Symbol>> groupingSymbols = new ArrayList<>();
for (List<Expression> groupingSet : groupingSets) {
ImmutableList.Builder<Symbol> symbols = ImmutableList.builder();
for (Expression expression : groupingSet) {
Expression parametersReplaced = ExpressionTreeRewriter.rewriteWith(new ParameterRewriter(analysis.getParameters(), analysis), expression);
groupingTranslations.addIntermediateMapping(expression, parametersReplaced);
Symbol input = subPlan.translate(expression);
Symbol output;
if (!groupingTranslations.containsSymbol(parametersReplaced)) {
output = symbolAllocator.newSymbol(parametersReplaced, analysis.getTypeWithCoercions(expression), "gid");
groupingTranslations.put(parametersReplaced, output);
} else {
output = groupingTranslations.get(parametersReplaced);
}
groupingSetMappings.put(output, input);
symbols.add(output);
}
groupingSymbols.add(symbols.build());
}
// 2.c. Generate GroupIdNode (multiple grouping sets) or ProjectNode (single grouping set)
Optional<Symbol> groupIdSymbol = Optional.empty();
if (groupingSets.size() > 1) {
groupIdSymbol = Optional.of(symbolAllocator.newSymbol("groupId", BIGINT));
GroupIdNode groupId = new GroupIdNode(idAllocator.getNextId(), subPlan.getRoot(), groupingSymbols, groupingSetMappings, argumentMappings, groupIdSymbol.get());
subPlan = new PlanBuilder(groupingTranslations, groupId, analysis.getParameters());
} else {
Assignments.Builder assignments = Assignments.builder();
for (Symbol output : argumentMappings.keySet()) {
assignments.put(output, argumentMappings.get(output).toSymbolReference());
}
for (Symbol output : groupingSetMappings.keySet()) {
assignments.put(output, groupingSetMappings.get(output).toSymbolReference());
}
ProjectNode project = new ProjectNode(idAllocator.getNextId(), subPlan.getRoot(), assignments.build());
subPlan = new PlanBuilder(groupingTranslations, project, analysis.getParameters());
}
TranslationMap aggregationTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
aggregationTranslations.copyMappingsFrom(groupingTranslations);
// 2.d. Rewrite aggregates
ImmutableMap.Builder<Symbol, FunctionCall> aggregationAssignments = ImmutableMap.builder();
ImmutableMap.Builder<Symbol, Signature> functions = ImmutableMap.builder();
boolean needPostProjectionCoercion = false;
for (FunctionCall aggregate : analysis.getAggregates(node)) {
Expression parametersReplaced = ExpressionTreeRewriter.rewriteWith(new ParameterRewriter(analysis.getParameters(), analysis), aggregate);
aggregationTranslations.addIntermediateMapping(aggregate, parametersReplaced);
Expression rewritten = argumentTranslations.rewrite(parametersReplaced);
Symbol newSymbol = symbolAllocator.newSymbol(rewritten, analysis.getType(aggregate));
// Therefore we can end up with this implicit cast, and have to move it into a post-projection
if (rewritten instanceof Cast) {
rewritten = ((Cast) rewritten).getExpression();
needPostProjectionCoercion = true;
}
aggregationAssignments.put(newSymbol, (FunctionCall) rewritten);
aggregationTranslations.put(parametersReplaced, newSymbol);
functions.put(newSymbol, analysis.getFunctionSignature(aggregate));
}
// 2.e. Mark distinct rows for each aggregate that has DISTINCT
// Map from aggregate function arguments to marker symbols, so that we can reuse the markers, if two aggregates have the same argument
Map<Set<Expression>, Symbol> argumentMarkers = new HashMap<>();
// Map from aggregate functions to marker symbols
Map<Symbol, Symbol> masks = new HashMap<>();
for (FunctionCall aggregate : Iterables.filter(analysis.getAggregates(node), FunctionCall::isDistinct)) {
Set<Expression> args = ImmutableSet.copyOf(aggregate.getArguments());
Symbol marker = argumentMarkers.get(args);
Symbol aggregateSymbol = aggregationTranslations.get(aggregate);
if (marker == null) {
if (args.size() == 1) {
marker = symbolAllocator.newSymbol(getOnlyElement(args), BOOLEAN, "distinct");
} else {
marker = symbolAllocator.newSymbol(aggregateSymbol.getName(), BOOLEAN, "distinct");
}
argumentMarkers.put(args, marker);
}
masks.put(aggregateSymbol, marker);
}
for (Map.Entry<Set<Expression>, Symbol> entry : argumentMarkers.entrySet()) {
ImmutableList.Builder<Symbol> builder = ImmutableList.builder();
builder.addAll(groupingSymbols.stream().flatMap(Collection::stream).distinct().collect(Collectors.toList()));
groupIdSymbol.ifPresent(builder::add);
for (Expression expression : entry.getKey()) {
builder.add(argumentTranslations.get(expression));
}
subPlan = subPlan.withNewRoot(new MarkDistinctNode(idAllocator.getNextId(), subPlan.getRoot(), entry.getValue(), builder.build(), Optional.empty()));
}
AggregationNode aggregationNode = new AggregationNode(idAllocator.getNextId(), subPlan.getRoot(), aggregationAssignments.build(), functions.build(), masks, groupingSymbols, AggregationNode.Step.SINGLE, Optional.empty(), groupIdSymbol);
subPlan = new PlanBuilder(aggregationTranslations, aggregationNode, analysis.getParameters());
// TODO: this is a hack, we should change type coercions to coerce the inputs to functions/operators instead of coercing the output
if (needPostProjectionCoercion) {
return explicitCoercionFields(subPlan, distinctGroupingColumns, analysis.getAggregates(node));
}
return subPlan;
}
use of java.util.Optional in project presto by prestodb.
the class SingleMarkDistinctToGroupBy method apply.
@Override
public Optional<PlanNode> apply(PlanNode node, Lookup lookup, PlanNodeIdAllocator idAllocator, SymbolAllocator symbolAllocator) {
if (!(node instanceof AggregationNode)) {
return Optional.empty();
}
AggregationNode parent = (AggregationNode) node;
PlanNode source = lookup.resolve(parent.getSource());
if (!(source instanceof MarkDistinctNode)) {
return Optional.empty();
}
MarkDistinctNode child = (MarkDistinctNode) source;
boolean hasFilters = parent.getAggregations().values().stream().map(FunctionCall::getFilter).anyMatch(Optional::isPresent);
if (hasFilters) {
return Optional.empty();
}
// optimize if and only if
// all aggregation functions have a single common distinct mask symbol
// AND all aggregation functions have mask
Set<Symbol> masks = ImmutableSet.copyOf(parent.getMasks().values());
if (masks.size() != 1 || parent.getMasks().size() != parent.getAggregations().size()) {
return Optional.empty();
}
Symbol mask = Iterables.getOnlyElement(masks);
if (!child.getMarkerSymbol().equals(mask)) {
return Optional.empty();
}
return Optional.of(new AggregationNode(idAllocator.getNextId(), new AggregationNode(idAllocator.getNextId(), child.getSource(), Collections.emptyMap(), ImmutableList.of(child.getDistinctSymbols()), SINGLE, child.getHashSymbol(), Optional.empty()), // remove DISTINCT flag from function calls
parent.getAssignments().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> removeDistinct(e.getValue()))), parent.getGroupingSets(), parent.getStep(), parent.getHashSymbol(), parent.getGroupIdSymbol()));
}
use of java.util.Optional in project presto by prestodb.
the class TestHashJoinOperator method buildHash.
private static LookupSourceFactory buildHash(boolean parallelBuild, TaskContext taskContext, List<Integer> hashChannels, RowPagesBuilder buildPages, Optional<InternalJoinFilterFunction> filterFunction) {
Optional<JoinFilterFunctionFactory> filterFunctionFactory = filterFunction.map(function -> ((session, addresses, channels) -> new StandardJoinFilterFunction(function, addresses, channels)));
int partitionCount = parallelBuild ? PARTITION_COUNT : 1;
LocalExchange localExchange = new LocalExchange(FIXED_HASH_DISTRIBUTION, partitionCount, buildPages.getTypes(), hashChannels, buildPages.getHashChannel());
LocalExchangeSinkFactory sinkFactory = localExchange.createSinkFactory();
sinkFactory.noMoreSinkFactories();
// collect input data into the partitioned exchange
DriverContext collectDriverContext = taskContext.addPipelineContext(0, true, true).addDriverContext();
ValuesOperatorFactory valuesOperatorFactory = new ValuesOperatorFactory(0, new PlanNodeId("values"), buildPages.getTypes(), buildPages.build());
LocalExchangeSinkOperatorFactory sinkOperatorFactory = new LocalExchangeSinkOperatorFactory(1, new PlanNodeId("sink"), sinkFactory, Function.identity());
Driver driver = new Driver(collectDriverContext, valuesOperatorFactory.createOperator(collectDriverContext), sinkOperatorFactory.createOperator(collectDriverContext));
valuesOperatorFactory.close();
sinkOperatorFactory.close();
while (!driver.isFinished()) {
driver.process();
}
// build hash tables
LocalExchangeSourceOperatorFactory sourceOperatorFactory = new LocalExchangeSourceOperatorFactory(0, new PlanNodeId("source"), localExchange);
HashBuilderOperatorFactory buildOperatorFactory = new HashBuilderOperatorFactory(1, new PlanNodeId("build"), buildPages.getTypes(), rangeList(buildPages.getTypes().size()), ImmutableMap.of(), hashChannels, buildPages.getHashChannel(), false, filterFunctionFactory, 100, partitionCount, new PagesIndex.TestingFactory());
PipelineContext buildPipeline = taskContext.addPipelineContext(1, true, true);
Driver[] buildDrivers = new Driver[partitionCount];
for (int i = 0; i < partitionCount; i++) {
DriverContext buildDriverContext = buildPipeline.addDriverContext();
buildDrivers[i] = new Driver(buildDriverContext, sourceOperatorFactory.createOperator(buildDriverContext), buildOperatorFactory.createOperator(buildDriverContext));
}
while (!buildOperatorFactory.getLookupSourceFactory().createLookupSource().isDone()) {
for (Driver buildDriver : buildDrivers) {
buildDriver.process();
}
}
return buildOperatorFactory.getLookupSourceFactory();
}
use of java.util.Optional in project presto by prestodb.
the class AbstractTestHiveClient method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new PrestoException(HIVE_METASTORE_ERROR, "Partition metadata not available"));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient(tableName.getSchemaName()).getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(HiveMetadata.PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(HiveMetadata.PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
getFutureValue(sink.finish());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify all temp files start with the unique prefix
Set<String> tempFiles = listAllDataFiles(getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertTrue(new Path(filePath).getName().startsWith(getFilePrefix(insertTableHandle)));
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, newSession(), TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
assertTrue(listAllDataFiles(stagingPathRoot).isEmpty());
}
}
use of java.util.Optional in project presto by prestodb.
the class AbstractTestHiveClient method readTable.
private MaterializedResult readTable(Transaction transaction, ConnectorTableHandle tableHandle, List<ColumnHandle> columnHandles, ConnectorSession session, TupleDomain<ColumnHandle> tupleDomain, OptionalInt expectedSplitCount, Optional<HiveStorageFormat> expectedStorageFormat) throws Exception {
List<ConnectorTableLayoutResult> tableLayoutResults = transaction.getMetadata().getTableLayouts(session, tableHandle, new Constraint<>(tupleDomain, bindings -> true), Optional.empty());
ConnectorTableLayoutHandle layoutHandle = getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle));
if (expectedSplitCount.isPresent()) {
assertEquals(splits.size(), expectedSplitCount.getAsInt());
}
ImmutableList.Builder<MaterializedRow> allRows = ImmutableList.builder();
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, columnHandles)) {
if (expectedStorageFormat.isPresent()) {
assertPageSourceType(pageSource, expectedStorageFormat.get());
}
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
allRows.addAll(result.getMaterializedRows());
}
}
return new MaterializedResult(allRows.build(), getTypes(columnHandles));
}
Aggregations