use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkTaskExecutorFactory method computeAllSplitsSize.
private static OptionalLong computeAllSplitsSize(List<TaskSource> taskSources) {
long sum = 0;
for (TaskSource taskSource : taskSources) {
for (ScheduledSplit scheduledSplit : taskSource.getSplits()) {
ConnectorSplit connectorSplit = scheduledSplit.getSplit().getConnectorSplit();
if (!connectorSplit.getSplitSizeInBytes().isPresent()) {
return OptionalLong.empty();
}
sum += connectorSplit.getSplitSizeInBytes().getAsLong();
}
}
return OptionalLong.of(sum);
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkRddFactory method createTaskSourcesRdd.
private PrestoSparkTaskSourceRdd createTaskSourcesRdd(PlanFragmentId fragmentId, JavaSparkContext sparkContext, Session session, PartitioningHandle partitioning, List<TableScanNode> tableScans, Map<PlanNodeId, SplitSource> splitSources, Optional<Integer> numberOfShufflePartitions) {
ListMultimap<Integer, SerializedPrestoSparkTaskSource> taskSourcesMap = ArrayListMultimap.create();
for (TableScanNode tableScan : tableScans) {
int totalNumberOfSplits = 0;
SplitSource splitSource = requireNonNull(splitSources.get(tableScan.getId()), "split source is missing for table scan node with id: " + tableScan.getId());
try (PrestoSparkSplitAssigner splitAssigner = createSplitAssigner(session, tableScan.getId(), splitSource, partitioning)) {
while (true) {
Optional<SetMultimap<Integer, ScheduledSplit>> batch = splitAssigner.getNextBatch();
if (!batch.isPresent()) {
break;
}
int numberOfSplitsInCurrentBatch = batch.get().size();
log.info("Found %s splits for table scan node with id %s", numberOfSplitsInCurrentBatch, tableScan.getId());
totalNumberOfSplits += numberOfSplitsInCurrentBatch;
taskSourcesMap.putAll(createTaskSources(tableScan.getId(), batch.get()));
}
}
log.info("Total number of splits for table scan node with id %s: %s", tableScan.getId(), totalNumberOfSplits);
}
long allTaskSourcesSerializedSizeInBytes = taskSourcesMap.values().stream().mapToLong(serializedTaskSource -> serializedTaskSource.getBytes().length).sum();
log.info("Total serialized size of all task sources for fragment %s: %s", fragmentId, DataSize.succinctBytes(allTaskSourcesSerializedSizeInBytes));
List<List<SerializedPrestoSparkTaskSource>> taskSourcesByPartitionId = new ArrayList<>();
// If the fragment contains any shuffle inputs, this value will be present
if (numberOfShufflePartitions.isPresent()) {
// non bucketed tables match, an empty partition must be inserted if bucket is missing.
for (int partitionId = 0; partitionId < numberOfShufflePartitions.get(); partitionId++) {
// Eagerly remove task sources from the map to let GC reclaim the memory
// If task sources are missing for a partition the removeAll returns an empty list
taskSourcesByPartitionId.add(requireNonNull(taskSourcesMap.removeAll(partitionId), "taskSources is null"));
}
} else {
taskSourcesByPartitionId.addAll(Multimaps.asMap(taskSourcesMap).values());
}
return new PrestoSparkTaskSourceRdd(sparkContext.sc(), taskSourcesByPartitionId);
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkTaskExecution method scheduleTableScanSource.
private synchronized void scheduleTableScanSource(DriverSplitRunnerFactory factory, List<ScheduledSplit> splits) {
factory.splitsAdded(splits.size(), SplitWeight.rawValueSum(splits, scheduledSplit -> scheduledSplit.getSplit().getSplitWeight()));
// Enqueue driver runners with split lifecycle for this plan node and driver life cycle combination.
ImmutableList.Builder<DriverSplitRunner> runners = ImmutableList.builder();
for (ScheduledSplit scheduledSplit : splits) {
// create a new driver for the split
runners.add(factory.createDriverRunner(scheduledSplit));
}
enqueueDriverSplitRunner(false, runners.build());
factory.noMoreDriverRunner();
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkPartitionedSplitAssigner method assignSplitsToTasks.
private SetMultimap<Integer, ScheduledSplit> assignSplitsToTasks(List<ScheduledSplit> splits) {
// expected to be mutable for efficiency reasons
HashMultimap<Integer, ScheduledSplit> result = HashMultimap.create();
for (ScheduledSplit scheduledSplit : splits) {
int partitionId = splitBucketFunction.applyAsInt(scheduledSplit.getSplit().getConnectorSplit());
result.put(partitionId, scheduledSplit);
}
return result;
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkPartitionedSplitAssigner method getNextBatch.
@Override
public Optional<SetMultimap<Integer, ScheduledSplit>> getNextBatch() {
if (splitSource.isFinished()) {
return Optional.empty();
}
List<ScheduledSplit> scheduledSplits = new ArrayList<>();
while (true) {
int remaining = maxBatchSize - scheduledSplits.size();
if (remaining <= 0) {
break;
}
SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), min(remaining, 1000)));
for (Split split : splitBatch.getSplits()) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScanNodeId, split));
}
if (splitBatch.isLastBatch() || splitSource.isFinished()) {
break;
}
}
return Optional.of(assignSplitsToTasks(scheduledSplits));
}
Aggregations