Search in sources :

Example 6 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkSourceDistributionSplitAssigner method getNextBatch.

@Override
public Optional<SetMultimap<Integer, ScheduledSplit>> getNextBatch() {
    if (splitSource.isFinished()) {
        return Optional.empty();
    }
    List<ScheduledSplit> scheduledSplits = new ArrayList<>();
    while (true) {
        int remaining = maxBatchSize - scheduledSplits.size();
        if (remaining <= 0) {
            break;
        }
        SplitSource.SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), min(remaining, 1000)));
        for (Split split : splitBatch.getSplits()) {
            scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScanNodeId, split));
        }
        if (splitBatch.isLastBatch() || splitSource.isFinished()) {
            break;
        }
    }
    return Optional.of(assignSplitsToTasks(scheduledSplits));
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) ArrayList(java.util.ArrayList) SplitSource(com.facebook.presto.split.SplitSource) Split(com.facebook.presto.metadata.Split) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit)

Example 7 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class PrestoSparkSourceDistributionSplitAssigner method assignSplitsToTasks.

private SetMultimap<Integer, ScheduledSplit> assignSplitsToTasks(List<ScheduledSplit> splits) {
    // expected to be mutable for efficiency reasons
    HashMultimap<Integer, ScheduledSplit> result = HashMultimap.create();
    boolean splitsDataSizeAvailable = splits.stream().allMatch(split -> split.getSplit().getConnectorSplit().getSplitSizeInBytes().isPresent());
    if (!splitsDataSizeAvailable) {
        for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
            result.put(splitIndex % initialPartitionCount, splits.get(splitIndex));
        }
        return result;
    }
    splits.sort((ScheduledSplit o1, ScheduledSplit o2) -> {
        long size1 = o1.getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
        long size2 = o2.getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
        return Long.compare(size2, size1);
    });
    if (partitionCountAutoTuneEnabled) {
        for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
            int partitionId;
            long splitSizeInBytes = splits.get(splitIndex).getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
            if ((partitionCount >= minSparkInputPartitionCountForAutoTune && queue.peek().getSplitsInBytes() + splitSizeInBytes <= maxSplitsSizePerPartitionInBytes) || partitionCount == maxSparkInputPartitionCountForAutoTune) {
                Partition partition = queue.poll();
                partitionId = partition.getPartitionId();
                partition.assignSplitWithSize(splitSizeInBytes);
                queue.add(partition);
            } else {
                partitionId = partitionCount++;
                Partition newPartition = new Partition(partitionId);
                newPartition.assignSplitWithSize(splitSizeInBytes);
                queue.add(newPartition);
            }
            result.put(partitionId, splits.get(splitIndex));
        }
    } else {
        // partition count is fixed
        for (int splitIndex = 0; splitIndex < splits.size(); splitIndex++) {
            int partitionId;
            long splitSizeInBytes = splits.get(splitIndex).getSplit().getConnectorSplit().getSplitSizeInBytes().getAsLong();
            if (partitionCount < initialPartitionCount) {
                partitionId = partitionCount++;
                Partition newPartition = new Partition(partitionId);
                newPartition.assignSplitWithSize(splitSizeInBytes);
                queue.add(newPartition);
            } else {
                Partition partition = queue.poll();
                partitionId = partition.getPartitionId();
                partition.assignSplitWithSize(splitSizeInBytes);
                queue.add(partition);
            }
            result.put(partitionId, splits.get(splitIndex));
        }
    }
    return result;
}
Also used : PrestoSparkSessionProperties.getMaxSplitsDataSizePerSparkPartition(com.facebook.presto.spark.PrestoSparkSessionProperties.getMaxSplitsDataSizePerSparkPartition) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit)

Example 8 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class TestPrestoSparkSourceDistributionSplitAssigner method assertSplitAssignment.

private static void assertSplitAssignment(boolean autoTuneEnabled, DataSize maxSplitsDataSizePerSparkPartition, int initialPartitionCount, int minSparkInputPartitionCountForAutoTune, int maxSparkInputPartitionCountForAutoTune, List<Long> splitSizes, Map<Integer, List<Long>> expectedAssignment) {
    // assign splits in one shot
    {
        PrestoSparkSplitAssigner assigner = new PrestoSparkSourceDistributionSplitAssigner(new PlanNodeId("test"), createSplitSource(splitSizes), Integer.MAX_VALUE, maxSplitsDataSizePerSparkPartition.toBytes(), initialPartitionCount, autoTuneEnabled, minSparkInputPartitionCountForAutoTune, maxSparkInputPartitionCountForAutoTune);
        Optional<SetMultimap<Integer, ScheduledSplit>> actualAssignment = assigner.getNextBatch();
        if (!splitSizes.isEmpty()) {
            assertThat(actualAssignment).isPresent();
            assertAssignedSplits(actualAssignment.get(), expectedAssignment);
        } else {
            assertThat(actualAssignment).isNotPresent();
        }
    }
    // assign splits iteratively
    for (int splitBatchSize = 1; splitBatchSize < splitSizes.size(); splitBatchSize *= 2) {
        HashMultimap<Integer, ScheduledSplit> actualAssignment = HashMultimap.create();
        // sort splits to make assignment match the assignment done in one shot
        List<Long> sortedSplits = new ArrayList<>(splitSizes);
        sortedSplits.sort(Comparator.<Long>naturalOrder().reversed());
        PrestoSparkSplitAssigner assigner = new PrestoSparkSourceDistributionSplitAssigner(new PlanNodeId("test"), createSplitSource(sortedSplits), splitBatchSize, maxSplitsDataSizePerSparkPartition.toBytes(), initialPartitionCount, autoTuneEnabled, minSparkInputPartitionCountForAutoTune, maxSparkInputPartitionCountForAutoTune);
        while (true) {
            Optional<SetMultimap<Integer, ScheduledSplit>> assignment = assigner.getNextBatch();
            if (!assignment.isPresent()) {
                break;
            }
            actualAssignment.putAll(assignment.get());
        }
        assertAssignedSplits(actualAssignment, expectedAssignment);
    }
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Optional(java.util.Optional) ArrayList(java.util.ArrayList) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SetMultimap(com.google.common.collect.SetMultimap) OptionalLong(java.util.OptionalLong)

Example 9 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class HttpRemoteTask method getSource.

private synchronized TaskSource getSource(PlanNodeId planNodeId) {
    Set<ScheduledSplit> splits = pendingSplits.get(planNodeId);
    boolean pendingNoMoreSplits = Boolean.TRUE.equals(this.noMoreSplits.get(planNodeId));
    boolean noMoreSplits = this.noMoreSplits.containsKey(planNodeId);
    Set<Lifespan> noMoreSplitsForLifespan = pendingNoMoreSplitsForLifespan.get(planNodeId);
    TaskSource element = null;
    if (!splits.isEmpty() || !noMoreSplitsForLifespan.isEmpty() || pendingNoMoreSplits) {
        element = new TaskSource(planNodeId, splits, noMoreSplitsForLifespan, noMoreSplits);
    }
    return element;
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Lifespan(com.facebook.presto.execution.Lifespan) TaskSource(com.facebook.presto.execution.TaskSource)

Example 10 with ScheduledSplit

use of com.facebook.presto.execution.ScheduledSplit in project presto by prestodb.

the class HttpRemoteTask method addSplits.

@Override
public synchronized void addSplits(Multimap<PlanNodeId, Split> splitsBySource) {
    requireNonNull(splitsBySource, "splitsBySource is null");
    // only add pending split if not done
    if (getTaskStatus().getState().isDone()) {
        return;
    }
    boolean needsUpdate = false;
    for (Entry<PlanNodeId, Collection<Split>> entry : splitsBySource.asMap().entrySet()) {
        PlanNodeId sourceId = entry.getKey();
        Collection<Split> splits = entry.getValue();
        boolean isTableScanSource = tableScanPlanNodeIds.contains(sourceId);
        checkState(!noMoreSplits.containsKey(sourceId), "noMoreSplits has already been set for %s", sourceId);
        int added = 0;
        long addedWeight = 0;
        for (Split split : splits) {
            if (pendingSplits.put(sourceId, new ScheduledSplit(nextSplitId.getAndIncrement(), sourceId, split))) {
                if (isTableScanSource) {
                    added++;
                    addedWeight = addExact(addedWeight, split.getSplitWeight().getRawValue());
                }
            }
        }
        if (isTableScanSource) {
            pendingSourceSplitCount += added;
            pendingSourceSplitsWeight = addExact(pendingSourceSplitsWeight, addedWeight);
            updateTaskStats();
        }
        needsUpdate = true;
    }
    updateSplitQueueSpace();
    if (needsUpdate) {
        this.needsUpdate.set(true);
        scheduleUpdate();
    }
}
Also used : PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Collection(java.util.Collection) Split(com.facebook.presto.metadata.Split) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit)

Aggregations

ScheduledSplit (com.facebook.presto.execution.ScheduledSplit)21 TaskSource (com.facebook.presto.execution.TaskSource)14 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)14 Split (com.facebook.presto.metadata.Split)8 ArrayList (java.util.ArrayList)8 Duration (io.airlift.units.Duration)5 Optional (java.util.Optional)5 Type (com.facebook.presto.common.type.Type)4 Lifespan (com.facebook.presto.execution.Lifespan)4 Driver (com.facebook.presto.operator.Driver)4 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)4 FixedPageSource (com.facebook.presto.spi.FixedPageSource)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)4 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4 Map (java.util.Map)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4 DriverFactory (com.facebook.presto.operator.DriverFactory)3 SerializedPrestoSparkTaskSource (com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskSource)3 ConnectorTransactionHandle (com.facebook.presto.spi.connector.ConnectorTransactionHandle)3