Search in sources :

Example 1 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkTaskExecutorFactory method computeAllSplitsSize.

private static OptionalLong computeAllSplitsSize(List<TaskSource> taskSources) {
    long sum = 0;
    for (TaskSource taskSource : taskSources) {
        for (ScheduledSplit scheduledSplit : taskSource.getSplits()) {
            ConnectorSplit connectorSplit = scheduledSplit.getSplit().getConnectorSplit();
            if (!connectorSplit.getSplitSizeInBytes().isPresent()) {
                return OptionalLong.empty();
            }
            sum += connectorSplit.getSplitSizeInBytes().getAsLong();
        }
    }
    return OptionalLong.of(sum);
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) SerializedPrestoSparkTaskSource(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskSource) TaskSource(com.facebook.presto.execution.TaskSource)

Example 2 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkRddFactory method createTaskSourcesRdd.

private PrestoSparkTaskSourceRdd createTaskSourcesRdd(PlanFragmentId fragmentId, JavaSparkContext sparkContext, Session session, PartitioningHandle partitioning, List<TableScanNode> tableScans, Map<PlanNodeId, SplitSource> splitSources, Optional<Integer> numberOfShufflePartitions) {
    ListMultimap<Integer, SerializedPrestoSparkTaskSource> taskSourcesMap = ArrayListMultimap.create();
    for (TableScanNode tableScan : tableScans) {
        int totalNumberOfSplits = 0;
        SplitSource splitSource = requireNonNull(splitSources.get(tableScan.getId()), "split source is missing for table scan node with id: " + tableScan.getId());
        try (PrestoSparkSplitAssigner splitAssigner = createSplitAssigner(session, tableScan.getId(), splitSource, partitioning)) {
            while (true) {
                Optional<SetMultimap<Integer, ScheduledSplit>> batch = splitAssigner.getNextBatch();
                if (!batch.isPresent()) {
                    break;
                }
                int numberOfSplitsInCurrentBatch = batch.get().size();
                log.info("Found %s splits for table scan node with id %s", numberOfSplitsInCurrentBatch, tableScan.getId());
                totalNumberOfSplits += numberOfSplitsInCurrentBatch;
                taskSourcesMap.putAll(createTaskSources(tableScan.getId(), batch.get()));
            }
        }
        log.info("Total number of splits for table scan node with id %s: %s", tableScan.getId(), totalNumberOfSplits);
    }
    long allTaskSourcesSerializedSizeInBytes = taskSourcesMap.values().stream().mapToLong(serializedTaskSource -> serializedTaskSource.getBytes().length).sum();
    log.info("Total serialized size of all task sources for fragment %s: %s", fragmentId, DataSize.succinctBytes(allTaskSourcesSerializedSizeInBytes));
    List<List<SerializedPrestoSparkTaskSource>> taskSourcesByPartitionId = new ArrayList<>();
    // If the fragment contains any shuffle inputs, this value will be present
    if (numberOfShufflePartitions.isPresent()) {
        // non bucketed tables match, an empty partition must be inserted if bucket is missing.
        for (int partitionId = 0; partitionId < numberOfShufflePartitions.get(); partitionId++) {
            // Eagerly remove task sources from the map to let GC reclaim the memory
            // If task sources are missing for a partition the removeAll returns an empty list
            taskSourcesByPartitionId.add(requireNonNull(taskSourcesMap.removeAll(partitionId), "taskSources is null"));
        }
    } else {
        taskSourcesByPartitionId.addAll(Multimaps.asMap(taskSourcesMap).values());
    }
    return new PrestoSparkTaskSourceRdd(sparkContext.sc(), taskSourcesByPartitionId);
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) WarningCollector(com.facebook.presto.spi.WarningCollector) JsonCodec(com.facebook.airlift.json.JsonCodec) ListMultimap(com.google.common.collect.ListMultimap) RemoteSourceNode(com.facebook.presto.sql.planner.plan.RemoteSourceNode) PrestoSparkTaskRdd(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskRdd) SplitSourceFactory(com.facebook.presto.sql.planner.SplitSourceFactory) PrestoSparkUtils.serializeZstdCompressed(com.facebook.presto.spark.util.PrestoSparkUtils.serializeZstdCompressed) TableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Sets.difference(com.google.common.collect.Sets.difference) PlanFragment(com.facebook.presto.sql.planner.PlanFragment) MutablePartitionId(com.facebook.presto.spark.classloader_interface.MutablePartitionId) PrestoSparkShuffleStats(com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats) Map(java.util.Map) Sets.union(com.google.common.collect.Sets.union) FIXED_BROADCAST_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION) SplitSource(com.facebook.presto.split.SplitSource) Broadcast(org.apache.spark.broadcast.Broadcast) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) SplitManager(com.facebook.presto.split.SplitManager) Tuple2(scala.Tuple2) SOURCE_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION) Codec(com.facebook.airlift.json.Codec) String.format(java.lang.String.format) PrestoSparkTaskProcessor(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskProcessor) DataSize(io.airlift.units.DataSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) PrestoSparkTaskExecutorFactoryProvider(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskExecutorFactoryProvider) SerializedPrestoSparkTaskDescriptor(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor) SerializedTaskInfo(com.facebook.presto.spark.classloader_interface.SerializedTaskInfo) Optional(java.util.Optional) FIXED_HASH_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION) RDD(org.apache.spark.rdd.RDD) PrestoSparkUtils.classTag(com.facebook.presto.spark.util.PrestoSparkUtils.classTag) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ARBITRARY_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.ARBITRARY_DISTRIBUTION) Logger(com.facebook.airlift.log.Logger) FIXED_ARBITRARY_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION) SINGLE_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) Multimaps(com.google.common.collect.Multimaps) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) PrestoSparkTaskSourceRdd(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd) PrestoSparkTaskOutput(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskOutput) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) CloseableSplitSourceProvider(com.facebook.presto.split.CloseableSplitSourceProvider) FIXED_PASSTHROUGH_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION) PlanNodeSearcher.searchFrom(com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom) PrestoSparkTaskDescriptor(com.facebook.presto.spark.PrestoSparkTaskDescriptor) SerializedPrestoSparkTaskSource(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskSource) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow) Session(com.facebook.presto.Session) TaskSource(com.facebook.presto.execution.TaskSource) SCALED_WRITER_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.SCALED_WRITER_DISTRIBUTION) CollectionAccumulator(org.apache.spark.util.CollectionAccumulator) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SetMultimap(com.google.common.collect.SetMultimap) PlanNode(com.facebook.presto.spi.plan.PlanNode) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) PartitioningHandle(com.facebook.presto.sql.planner.PartitioningHandle) PartitioningProviderManager(com.facebook.presto.sql.planner.PartitioningProviderManager) COORDINATOR_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.COORDINATOR_DISTRIBUTION) ArrayList(java.util.ArrayList) SerializedPrestoSparkTaskSource(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskSource) SetMultimap(com.google.common.collect.SetMultimap) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) SplitSource(com.facebook.presto.split.SplitSource) PrestoSparkTaskSourceRdd(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd)

Example 3 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkTaskExecution method scheduleTableScanSource.

private synchronized void scheduleTableScanSource(DriverSplitRunnerFactory factory, List<ScheduledSplit> splits) {
    factory.splitsAdded(splits.size(), SplitWeight.rawValueSum(splits, scheduledSplit -> scheduledSplit.getSplit().getSplitWeight()));
    // Enqueue driver runners with split lifecycle for this plan node and driver life cycle combination.
    ImmutableList.Builder<DriverSplitRunner> runners = ImmutableList.builder();
    for (ScheduledSplit scheduledSplit : splits) {
        // create a new driver for the split
        runners.add(factory.createDriverRunner(scheduledSplit));
    }
    enqueueDriverSplitRunner(false, runners.build());
    factory.noMoreDriverRunner();
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) SystemSessionProperties.getSplitConcurrencyAdjustmentInterval(com.facebook.presto.SystemSessionProperties.getSplitConcurrencyAdjustmentInterval) ListMultimap(com.google.common.collect.ListMultimap) SplitRunner(com.facebook.presto.execution.SplitRunner) Duration(io.airlift.units.Duration) TaskHandle(com.facebook.presto.execution.executor.TaskHandle) SystemSessionProperties.getInitialSplitsPerNode(com.facebook.presto.SystemSessionProperties.getInitialSplitsPerNode) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) PipelineContext(com.facebook.presto.operator.PipelineContext) Lifespan(com.facebook.presto.execution.Lifespan) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) GuardedBy(javax.annotation.concurrent.GuardedBy) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) LocalExecutionPlan(com.facebook.presto.sql.planner.LocalExecutionPlanner.LocalExecutionPlan) TaskExecutor(com.facebook.presto.execution.executor.TaskExecutor) Optional(java.util.Optional) TaskStateMachine(com.facebook.presto.execution.TaskStateMachine) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) DriverStats(com.facebook.presto.operator.DriverStats) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) UNGROUPED_EXECUTION(com.facebook.presto.operator.PipelineExecutionStrategy.UNGROUPED_EXECUTION) DriverFactory(com.facebook.presto.operator.DriverFactory) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Nullable(javax.annotation.Nullable) TaskContext(com.facebook.presto.operator.TaskContext) SetThreadName(com.facebook.airlift.concurrent.SetThreadName) SplitMonitor(com.facebook.presto.event.SplitMonitor) Executor(java.util.concurrent.Executor) TaskSource(com.facebook.presto.execution.TaskSource) Driver(com.facebook.presto.operator.Driver) FutureCallback(com.google.common.util.concurrent.FutureCallback) SystemSessionProperties.getMaxDriversPerTask(com.facebook.presto.SystemSessionProperties.getMaxDriversPerTask) Futures(com.google.common.util.concurrent.Futures) TaskId(com.facebook.presto.execution.TaskId) DriverContext(com.facebook.presto.operator.DriverContext) SplitWeight(com.facebook.presto.spi.SplitWeight) SECONDS(java.util.concurrent.TimeUnit.SECONDS) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) ImmutableList(com.google.common.collect.ImmutableList)

Example 4 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkPartitionedSplitAssigner method assignSplitsToTasks.

private SetMultimap<Integer, ScheduledSplit> assignSplitsToTasks(List<ScheduledSplit> splits) {
    // expected to be mutable for efficiency reasons
    HashMultimap<Integer, ScheduledSplit> result = HashMultimap.create();
    for (ScheduledSplit scheduledSplit : splits) {
        int partitionId = splitBucketFunction.applyAsInt(scheduledSplit.getSplit().getConnectorSplit());
        result.put(partitionId, scheduledSplit);
    }
    return result;
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit)

Example 5 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkPartitionedSplitAssigner method getNextBatch.

@Override
public Optional<SetMultimap<Integer, ScheduledSplit>> getNextBatch() {
    if (splitSource.isFinished()) {
        return Optional.empty();
    }
    List<ScheduledSplit> scheduledSplits = new ArrayList<>();
    while (true) {
        int remaining = maxBatchSize - scheduledSplits.size();
        if (remaining <= 0) {
            break;
        }
        SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), min(remaining, 1000)));
        for (Split split : splitBatch.getSplits()) {
            scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScanNodeId, split));
        }
        if (splitBatch.isLastBatch() || splitSource.isFinished()) {
            break;
        }
    }
    return Optional.of(assignSplitsToTasks(scheduledSplits));
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) ArrayList(java.util.ArrayList) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) Split(com.facebook.presto.metadata.Split) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch)

Aggregations

ScheduledSplit (com.facebook.presto.execution.ScheduledSplit)21 TaskSource (com.facebook.presto.execution.TaskSource)14 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)14 Split (com.facebook.presto.metadata.Split)8 ArrayList (java.util.ArrayList)8 Duration (io.airlift.units.Duration)5 Optional (java.util.Optional)5 Type (com.facebook.presto.common.type.Type)4 Lifespan (com.facebook.presto.execution.Lifespan)4 Driver (com.facebook.presto.operator.Driver)4 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)4 FixedPageSource (com.facebook.presto.spi.FixedPageSource)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)4 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4 Map (java.util.Map)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4 DriverFactory (com.facebook.presto.operator.DriverFactory)3 SerializedPrestoSparkTaskSource (com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskSource)3 ConnectorTransactionHandle (com.facebook.presto.spi.connector.ConnectorTransactionHandle)3