use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkSourceDistributionSplitAssigner method getNextBatch.
@Override
public Optional<SetMultimap<Integer, ScheduledSplit>> getNextBatch() {
if (splitSource.isFinished()) {
return Optional.empty();
}
List<ScheduledSplit> scheduledSplits = new ArrayList<>();
while (true) {
int remaining = maxBatchSize - scheduledSplits.size();
if (remaining <= 0) {
break;
}
SplitSource.SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), min(remaining, 1000)));
for (Split split : splitBatch.getSplits()) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScanNodeId, split));
}
if (splitBatch.isLastBatch() || splitSource.isFinished()) {
break;
}
}
return Optional.of(assignSplitsToTasks(scheduledSplits));
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class PrestoSparkSourceDistributionSplitAssigner method assignSplitsToTasks.
private SetMultimap<Integer, ScheduledSplit> assignSplitsToTasks(List<ScheduledSplit> splits) {
// expected to be mutable for efficiency reasons
HashMultimap<Integer, ScheduledSplit> result = HashMultimap.create();
boolean splitsDataSizeAvailable = splits.stream().allMatch(split -> split.getSplit().getConnectorSplit().getSplitSizeInBytes().isPresent());
if (!splitsDataSizeAvailable) {
for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
result.put(splitIndex % initialPartitionCount, splits.get(splitIndex));
}
return result;
}
splits.sort((ScheduledSplit o1, ScheduledSplit o2) -> {
long size1 = o1.getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
long size2 = o2.getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
return Long.compare(size2, size1);
});
if (partitionCountAutoTuneEnabled) {
for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
int partitionId;
long splitSizeInBytes = splits.get(splitIndex).getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
if ((partitionCount >= minSparkInputPartitionCountForAutoTune && queue.peek().getSplitsInBytes() + splitSizeInBytes <= maxSplitsSizePerPartitionInBytes) || partitionCount == maxSparkInputPartitionCountForAutoTune) {
Partition partition = queue.poll();
partitionId = partition.getPartitionId();
partition.assignSplitWithSize(splitSizeInBytes);
queue.add(partition);
} else {
partitionId = partitionCount++;
Partition newPartition = new Partition(partitionId);
newPartition.assignSplitWithSize(splitSizeInBytes);
queue.add(newPartition);
}
result.put(partitionId, splits.get(splitIndex));
}
} else {
// partition count is fixed
for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
int partitionId;
long splitSizeInBytes = splits.get(splitIndex).getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
if (partitionCount < initialPartitionCount) {
partitionId = partitionCount++;
Partition newPartition = new Partition(partitionId);
newPartition.assignSplitWithSize(splitSizeInBytes);
queue.add(newPartition);
} else {
Partition partition = queue.poll();
partitionId = partition.getPartitionId();
partition.assignSplitWithSize(splitSizeInBytes);
queue.add(partition);
}
result.put(partitionId, splits.get(splitIndex));
}
}
return result;
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class TestPrestoSparkSourceDistributionSplitAssigner method assertSplitAssignment.
private static void assertSplitAssignment(boolean autoTuneEnabled, DataSize maxSplitsDataSizePerSparkPartition, int initialPartitionCount, int minSparkInputPartitionCountForAutoTune, int maxSparkInputPartitionCountForAutoTune, List<Long> splitSizes, Map<Integer, List<Long>> expectedAssignment) {
// assign splits in one shot
{
PrestoSparkSplitAssigner assigner = new PrestoSparkSourceDistributionSplitAssigner(new PlanNodeId("test"), createSplitSource(splitSizes), Integer.MAX_VALUE, maxSplitsDataSizePerSparkPartition.toBytes(), initialPartitionCount, autoTuneEnabled, minSparkInputPartitionCountForAutoTune, maxSparkInputPartitionCountForAutoTune);
Optional<SetMultimap<Integer, ScheduledSplit>> actualAssignment = assigner.getNextBatch();
if (!splitSizes.isEmpty()) {
assertThat(actualAssignment).isPresent();
assertAssignedSplits(actualAssignment.get(), expectedAssignment);
} else {
assertThat(actualAssignment).isNotPresent();
}
}
// assign splits iteratively
for (int splitBatchSize = 1; splitBatchSize < splitSizes.size(); splitBatchSize *= 2) {
HashMultimap<Integer, ScheduledSplit> actualAssignment = HashMultimap.create();
// sort splits to make assignment match the assignment done in one shot
List<Long> sortedSplits = new ArrayList<>(splitSizes);
sortedSplits.sort(Comparator.<Long>naturalOrder().reversed());
PrestoSparkSplitAssigner assigner = new PrestoSparkSourceDistributionSplitAssigner(new PlanNodeId("test"), createSplitSource(sortedSplits), splitBatchSize, maxSplitsDataSizePerSparkPartition.toBytes(), initialPartitionCount, autoTuneEnabled, minSparkInputPartitionCountForAutoTune, maxSparkInputPartitionCountForAutoTune);
while (true) {
Optional<SetMultimap<Integer, ScheduledSplit>> assignment = assigner.getNextBatch();
if (!assignment.isPresent()) {
break;
}
actualAssignment.putAll(assignment.get());
}
assertAssignedSplits(actualAssignment, expectedAssignment);
}
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class HttpRemoteTask method getSource.
private synchronized TaskSource getSource(PlanNodeId planNodeId) {
Set<ScheduledSplit> splits = pendingSplits.get(planNodeId);
boolean pendingNoMoreSplits = Boolean.TRUE.equals(this.noMoreSplits.get(planNodeId));
boolean noMoreSplits = this.noMoreSplits.containsKey(planNodeId);
Set<Lifespan> noMoreSplitsForLifespan = pendingNoMoreSplitsForLifespan.get(planNodeId);
TaskSource element = null;
if (!splits.isEmpty() || !noMoreSplitsForLifespan.isEmpty() || pendingNoMoreSplits) {
element = new TaskSource(planNodeId, splits, noMoreSplitsForLifespan, noMoreSplits);
}
return element;
}
use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.
the class HttpRemoteTask method addSplits.
@Override
public synchronized void addSplits(Multimap<PlanNodeId, Split> splitsBySource) {
requireNonNull(splitsBySource, "splitsBySource is null");
// only add pending split if not done
if (getTaskStatus().getState().isDone()) {
return;
}
boolean needsUpdate = false;
for (Entry<PlanNodeId, Collection<Split>> entry : splitsBySource.asMap().entrySet()) {
PlanNodeId sourceId = entry.getKey();
Collection<Split> splits = entry.getValue();
boolean isTableScanSource = tableScanPlanNodeIds.contains(sourceId);
checkState(!noMoreSplits.containsKey(sourceId), "noMoreSplits has already been set for %s", sourceId);
int added = 0;
long addedWeight = 0;
for (Split split : splits) {
if (pendingSplits.put(sourceId, new ScheduledSplit(nextSplitId.getAndIncrement(), sourceId, split))) {
if (isTableScanSource) {
added++;
addedWeight = addExact(addedWeight, split.getSplitWeight().getRawValue());
}
}
}
if (isTableScanSource) {
pendingSourceSplitCount += added;
pendingSourceSplitsWeight = addExact(pendingSourceSplitsWeight, addedWeight);
updateTaskStats();
}
needsUpdate = true;
}
updateSplitQueueSpace();
if (needsUpdate) {
this.needsUpdate.set(true);
scheduleUpdate();
}
}
Aggregations