use of com.facebook.presto.spi.plan.TableScanNode in project presto by prestodb.
the class TestPinotQueryGeneratorSql method testDefaultNoTopNPushdown.
@Test
public void testDefaultNoTopNPushdown() {
PlanBuilder planBuilder = createPlanBuilder(defaultSessionHolder);
TableScanNode tableScanNode = tableScan(planBuilder, pinotTable, city, fare);
AggregationNode aggregationNode = planBuilder.aggregation(aggregationNodeBuilder -> aggregationNodeBuilder.source(tableScanNode).singleGroupingSet(variable("city")).addAggregation(planBuilder.variable("sum_fare"), getRowExpression("sum(fare)", defaultSessionHolder)));
pinotConfig.setPushdownTopNBrokerQueries(false);
TopNNode topN = new TopNNode(Optional.empty(), planBuilder.getIdAllocator().getNextId(), aggregationNode, 1000, new OrderingScheme(ImmutableList.of(new Ordering(variable("sum_fare"), SortOrder.ASC_NULLS_FIRST))), TopNNode.Step.SINGLE);
Optional<PinotQueryGenerator.PinotQueryGeneratorResult> generatedQuery = new PinotQueryGenerator(pinotConfig, functionAndTypeManager, functionAndTypeManager, standardFunctionResolution).generate(topN, defaultSessionHolder.getConnectorSession());
assertFalse(generatedQuery.isPresent());
SessionHolder sessionHolder = new SessionHolder(pinotConfig);
testPinotQuery(pinotConfig, aggregationNode, "SELECT city, sum(fare) FROM realtimeOnly GROUP BY city LIMIT 10000", sessionHolder, ImmutableMap.of());
}
use of com.facebook.presto.spi.plan.TableScanNode in project presto by prestodb.
the class TestPinotQueryGeneratorSql method testAggregationWithOrderByPushDownInTopN.
@Override
@Test
public void testAggregationWithOrderByPushDownInTopN() {
pinotConfig.setPushdownTopNBrokerQueries(true);
SessionHolder sessionHolder = new SessionHolder(pinotConfig);
PlanBuilder planBuilder = createPlanBuilder(defaultSessionHolder);
TableScanNode tableScanNode = tableScan(planBuilder, pinotTable, city, fare);
AggregationNode aggregationNode = planBuilder.aggregation(aggregationNodeBuilder -> aggregationNodeBuilder.source(tableScanNode).singleGroupingSet(variable("city")).addAggregation(planBuilder.variable("sum_fare"), getRowExpression("sum(fare)", defaultSessionHolder)));
testPinotQuery(pinotConfig, aggregationNode, "SELECT city, sum(fare) FROM realtimeOnly GROUP BY city LIMIT 10000", sessionHolder, ImmutableMap.of());
TopNNode topN = new TopNNode(Optional.empty(), planBuilder.getIdAllocator().getNextId(), aggregationNode, 50L, new OrderingScheme(ImmutableList.of(new Ordering(variable("city"), SortOrder.DESC_NULLS_FIRST))), TopNNode.Step.SINGLE);
testPinotQuery(pinotConfig, topN, "SELECT city, sum(fare) FROM realtimeOnly GROUP BY city ORDER BY city DESC LIMIT 50", sessionHolder, ImmutableMap.of());
topN = new TopNNode(Optional.empty(), planBuilder.getIdAllocator().getNextId(), aggregationNode, 1000L, new OrderingScheme(ImmutableList.of(new Ordering(variable("sum_fare"), SortOrder.ASC_NULLS_FIRST))), TopNNode.Step.SINGLE);
testPinotQuery(pinotConfig, topN, "SELECT city, sum(fare) FROM realtimeOnly GROUP BY city ORDER BY sum(fare) LIMIT 1000", sessionHolder, ImmutableMap.of());
topN = new TopNNode(Optional.empty(), planBuilder.getIdAllocator().getNextId(), aggregationNode, 1000L, new OrderingScheme(ImmutableList.of(new Ordering(variable("sum_fare"), SortOrder.ASC_NULLS_FIRST))), TopNNode.Step.SINGLE);
testPinotQuery(pinotConfig, topN, "SELECT city, sum(fare) FROM realtimeOnly GROUP BY city ORDER BY sum(fare) LIMIT 1000", sessionHolder, ImmutableMap.of());
}
use of com.facebook.presto.spi.plan.TableScanNode in project presto by prestodb.
the class TestPinotQueryGeneratorSql method testSelectionWithOrderBy.
@Test
public void testSelectionWithOrderBy() {
pinotConfig.setPushdownTopNBrokerQueries(true);
PlanBuilder planBuilder = createPlanBuilder(defaultSessionHolder);
TableScanNode tableScanNode = tableScan(planBuilder, pinotTable, regionId, city, fare);
SessionHolder sessionHolder = new SessionHolder(pinotConfig);
testPinotQuery(pinotConfig, topN(planBuilder, 50L, ImmutableList.of("fare"), ImmutableList.of(false), tableScanNode), "SELECT regionId, city, fare FROM realtimeOnly ORDER BY fare DESC LIMIT 50", sessionHolder, ImmutableMap.of());
testPinotQuery(pinotConfig, topN(planBuilder, 50L, ImmutableList.of("fare", "city"), ImmutableList.of(true, false), tableScanNode), "SELECT regionId, city, fare FROM realtimeOnly ORDER BY fare, city DESC LIMIT 50", sessionHolder, ImmutableMap.of());
testPinotQuery(pinotConfig, topN(planBuilder, 50L, ImmutableList.of("city", "fare"), ImmutableList.of(false, true), tableScanNode), "SELECT regionId, city, fare FROM realtimeOnly ORDER BY city DESC, fare LIMIT 50", sessionHolder, ImmutableMap.of());
TopNNode topNNode = topN(planBuilder, 50L, ImmutableList.of("fare", "city"), ImmutableList.of(true, false), tableScanNode);
testPinotQuery(pinotConfig, project(planBuilder, topNNode, ImmutableList.of("regionid", "city")), "SELECT regionId, city FROM realtimeOnly ORDER BY fare, city DESC LIMIT 50", sessionHolder, ImmutableMap.of());
tableScanNode = tableScan(planBuilder, pinotTable, fare, city, regionId);
testPinotQuery(pinotConfig, topN(planBuilder, 500L, ImmutableList.of("fare"), ImmutableList.of(false), tableScanNode), "SELECT fare, city, regionId FROM realtimeOnly ORDER BY fare DESC LIMIT 500", sessionHolder, ImmutableMap.of());
testPinotQuery(pinotConfig, topN(planBuilder, 5000L, ImmutableList.of("fare", "city"), ImmutableList.of(true, false), tableScanNode), "SELECT fare, city, regionId FROM realtimeOnly ORDER BY fare, city DESC LIMIT 5000", sessionHolder, ImmutableMap.of());
}
use of com.facebook.presto.spi.plan.TableScanNode in project presto by prestodb.
the class PrestoSparkRddFactory method createRdd.
private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
Optional<Integer> numberOfShufflePartitions = Optional.empty();
Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
shuffleInputRddMap.put(input.getKey().toString(), rdd);
if (!numberOfShufflePartitions.isPresent()) {
numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
} else {
checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
}
}
PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
if (!tableScans.isEmpty()) {
try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
}
} else if (rddInputs.size() == 0) {
checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
// In case of no inputs we still need to schedule a task.
// Task with no inputs may produce results (e.g.: ValuesNode).
// To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
// Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
} else {
taskSourceRdd = Optional.empty();
}
return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
use of com.facebook.presto.spi.plan.TableScanNode in project presto by prestodb.
the class PrestoSparkRddFactory method createTaskSourcesRdd.
private PrestoSparkTaskSourceRdd createTaskSourcesRdd(PlanFragmentId fragmentId, JavaSparkContext sparkContext, Session session, PartitioningHandle partitioning, List<TableScanNode> tableScans, Map<PlanNodeId, SplitSource> splitSources, Optional<Integer> numberOfShufflePartitions) {
ListMultimap<Integer, SerializedPrestoSparkTaskSource> taskSourcesMap = ArrayListMultimap.create();
for (TableScanNode tableScan : tableScans) {
int totalNumberOfSplits = 0;
SplitSource splitSource = requireNonNull(splitSources.get(tableScan.getId()), "split source is missing for table scan node with id: " + tableScan.getId());
try (PrestoSparkSplitAssigner splitAssigner = createSplitAssigner(session, tableScan.getId(), splitSource, partitioning)) {
while (true) {
Optional<SetMultimap<Integer, ScheduledSplit>> batch = splitAssigner.getNextBatch();
if (!batch.isPresent()) {
break;
}
int numberOfSplitsInCurrentBatch = batch.get().size();
log.info("Found %s splits for table scan node with id %s", numberOfSplitsInCurrentBatch, tableScan.getId());
totalNumberOfSplits += numberOfSplitsInCurrentBatch;
taskSourcesMap.putAll(createTaskSources(tableScan.getId(), batch.get()));
}
}
log.info("Total number of splits for table scan node with id %s: %s", tableScan.getId(), totalNumberOfSplits);
}
long allTaskSourcesSerializedSizeInBytes = taskSourcesMap.values().stream().mapToLong(serializedTaskSource -> serializedTaskSource.getBytes().length).sum();
log.info("Total serialized size of all task sources for fragment %s: %s", fragmentId, DataSize.succinctBytes(allTaskSourcesSerializedSizeInBytes));
List<List<SerializedPrestoSparkTaskSource>> taskSourcesByPartitionId = new ArrayList<>();
// If the fragment contains any shuffle inputs, this value will be present
if (numberOfShufflePartitions.isPresent()) {
// non bucketed tables match, an empty partition must be inserted if bucket is missing.
for (int partitionId = 0; partitionId < numberOfShufflePartitions.get(); partitionId++) {
// Eagerly remove task sources from the map to let GC reclaim the memory
// If task sources are missing for a partition the removeAll returns an empty list
taskSourcesByPartitionId.add(requireNonNull(taskSourcesMap.removeAll(partitionId), "taskSources is null"));
}
} else {
taskSourcesByPartitionId.addAll(Multimaps.asMap(taskSourcesMap).values());
}
return new PrestoSparkTaskSourceRdd(sparkContext.sc(), taskSourcesByPartitionId);
}
Aggregations