Examples with Split - io.prestosql.metadata.Split

Example 26 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class NodeScheduler method selectDistributionNodes.

public static SplitPlacementResult selectDistributionNodes(NodeMap nodeMap, NodeTaskMap nodeTaskMap, int maxSplitsPerNode, int maxPendingSplitsPerTask, Set<Split> splits, List<RemoteTask> existingTasks, BucketNodeMap bucketNodeMap) {
    Multimap<InternalNode, Split> assignments = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    Set<InternalNode> blockedNodes = new HashSet<>();
    for (Split split : splits) {
        // node placement is forced by the bucket to node map
        InternalNode node = bucketNodeMap.getAssignedNode(split).get();
        // if node is full, don't schedule now, which will push back on the scheduling of splits
        if (assignmentStats.getTotalSplitCount(node) < maxSplitsPerNode || assignmentStats.getQueuedSplitCountForStage(node) < maxPendingSplitsPerTask) {
            assignments.put(node, split);
            assignmentStats.addAssignedSplit(node);
        } else {
            blockedNodes.add(node);
        }
    }
    ListenableFuture<?> blocked = toWhenHasSplitQueueSpaceFuture(blockedNodes, existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
    return new SplitPlacementResult(blocked, ImmutableMultimap.copyOf(assignments));
}

Also used : InternalNode(io.prestosql.metadata.InternalNode) Split(io.prestosql.metadata.Split) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 27 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class SimpleFixedNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeMap nodeMap = this.nodeMap.get().get();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    List<InternalNode> candidateNodes = new ArrayList<>();
    if (!stage.isPresent()) {
        log.error("Cant schedule as stage missing");
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "stage is empty");
    }
    PlanNodeId planNodeId = stage.get().getFragment().getFeederCTEParentId().get();
    // if still feeder has not been scheduled then no point in scheduling this also
    if (!feederScheduledNodes.containsKey(planNodeId)) {
        return new SplitPlacementResult(immediateFuture(null), assignment);
    }
    // Find max number of splits consumer can schedule in current cycle.
    int maxSplitsToSchedule = feederScheduledNodes.get(planNodeId).getSplitCount() - consumedNodes.getSplitCount();
    // find list of nodes where still consumer has not been scheduled.
    if (feederScheduledNodes.get(planNodeId).getAssignedNodes().equals(consumedNodes.getAssignedNodes())) {
        candidateNodes = new ArrayList<>(consumedNodes.getAssignedNodes());
    } else {
        for (InternalNode node : feederScheduledNodes.get(planNodeId).getAssignedNodes()) {
            if (!consumedNodes.getAssignedNodes().contains(node)) {
                candidateNodes.add(node);
                consumedNodes.getAssignedNodes().add(node);
            }
        }
    }
    // schedule derived number of splits on derived list of nodes.
    // It is expected that splits count should be at-least equal to number of nodes so that each node gets at-least
    // one split.
    int index = 0;
    int totalNodes = candidateNodes.size();
    for (Split split : Iterables.limit(splits, maxSplitsToSchedule)) {
        InternalNode chosenNode = candidateNodes.get(index % totalNodes);
        assignment.put(chosenNode, split);
        assignmentStats.addAssignedSplit(chosenNode);
        index++;
    }
    consumedNodes.updateSplitCount(maxSplitsToSchedule);
    return new SplitPlacementResult(immediateFuture(null), assignment);
}

Also used : PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ArrayList(java.util.ArrayList) PrestoException(io.prestosql.spi.PrestoException) InternalNode(io.prestosql.metadata.InternalNode) Split(io.prestosql.metadata.Split)

Example 28 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class IndexLoader method streamIndexDataForSingleKey.

public IndexedData streamIndexDataForSingleKey(UpdateRequest updateRequest) {
    Page indexKeyTuple = updateRequest.getPage().getRegion(0, 1);
    PageBuffer pageBuffer = new PageBuffer(100);
    DriverFactory driverFactory = indexBuildDriverFactoryProvider.createStreaming(pageBuffer, indexKeyTuple);
    Driver driver = driverFactory.createDriver(pipelineContext.addDriverContext());
    PageRecordSet pageRecordSet = new PageRecordSet(keyTypes, indexKeyTuple);
    PlanNodeId planNodeId = driverFactory.getSourceId().get();
    ScheduledSplit split = new ScheduledSplit(0, planNodeId, new Split(INDEX_CONNECTOR_ID, new IndexSplit(pageRecordSet), Lifespan.taskWide()));
    driver.updateSource(new TaskSource(planNodeId, ImmutableSet.of(split), true));
    return new StreamingIndexedData(outputTypes, keyTypes, indexKeyTuple, pageBuffer, driver);
}

Also used : PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ScheduledSplit(io.prestosql.execution.ScheduledSplit) DriverFactory(io.prestosql.operator.DriverFactory) Driver(io.prestosql.operator.Driver) Page(io.prestosql.spi.Page) Split(io.prestosql.metadata.Split) ScheduledSplit(io.prestosql.execution.ScheduledSplit) TaskSource(io.prestosql.execution.TaskSource)

Example 29 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class ExtractSpatialJoins method loadKdbTree.

private static KdbTree loadKdbTree(String tableName, Session session, Metadata metadata, SplitManager splitManager, PageSourceManager pageSourceManager, PlanNodeId nodeId) {
    QualifiedObjectName name = toQualifiedObjectName(tableName, session.getCatalog().get(), session.getSchema().get());
    TableHandle tableHandle = metadata.getTableHandle(session, name).orElseThrow(() -> new PrestoException(INVALID_SPATIAL_PARTITIONING, format("Table not found: %s", name)));
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    List<ColumnHandle> visibleColumnHandles = columnHandles.values().stream().filter(handle -> !metadata.getColumnMetadata(session, tableHandle, handle).isHidden()).collect(toImmutableList());
    checkSpatialPartitioningTable(visibleColumnHandles.size() == 1, "Expected single column for table %s, but found %s columns", name, columnHandles.size());
    ColumnHandle kdbTreeColumn = Iterables.getOnlyElement(visibleColumnHandles);
    Optional<KdbTree> kdbTree = Optional.empty();
    try (SplitSource splitSource = splitManager.getSplits(session, tableHandle, UNGROUPED_SCHEDULING, null, Optional.empty(), Collections.emptyMap(), ImmutableSet.of(), false, nodeId)) {
        while (!Thread.currentThread().isInterrupted()) {
            SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), 1000));
            List<Split> splits = splitBatch.getSplits();
            for (Split split : splits) {
                try (ConnectorPageSource pageSource = pageSourceManager.createPageSource(session, split, tableHandle, ImmutableList.of(kdbTreeColumn), Optional.empty())) {
                    do {
                        getFutureValue(pageSource.isBlocked());
                        Page page = pageSource.getNextPage();
                        if (page != null && page.getPositionCount() > 0) {
                            checkSpatialPartitioningTable(!kdbTree.isPresent(), "Expected exactly one row for table %s, but found more", name);
                            checkSpatialPartitioningTable(page.getPositionCount() == 1, "Expected exactly one row for table %s, but found %s rows", name, page.getPositionCount());
                            String kdbTreeJson = VARCHAR.getSlice(page.getBlock(0), 0).toStringUtf8();
                            try {
                                kdbTree = Optional.of(KdbTreeUtils.fromJson(kdbTreeJson));
                            } catch (IllegalArgumentException e) {
                                checkSpatialPartitioningTable(false, "Invalid JSON string for KDB tree: %s", e.getMessage());
                            }
                        }
                    } while (!pageSource.isFinished());
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            if (splitBatch.isLastBatch()) {
                break;
            }
        }
    }
    checkSpatialPartitioningTable(kdbTree.isPresent(), "Expected exactly one row for table %s, but got none", name);
    return kdbTree.get();
}

Also used : ConstantExpression(io.prestosql.spi.relation.ConstantExpression) SymbolsExtractor.extractUnique(io.prestosql.sql.planner.SymbolsExtractor.extractUnique) SystemSessionProperties.getSpatialPartitioningTableName(io.prestosql.SystemSessionProperties.getSpatialPartitioningTableName) INVALID_SPATIAL_PARTITIONING(io.prestosql.spi.StandardErrorCode.INVALID_SPATIAL_PARTITIONING) TypeProvider(io.prestosql.sql.planner.TypeProvider) Result(io.prestosql.sql.planner.iterative.Rule.Result) KdbTree(io.prestosql.geospatial.KdbTree) TypeAnalyzer(io.prestosql.sql.planner.TypeAnalyzer) CallExpression(io.prestosql.spi.relation.CallExpression) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) KdbTreeUtils(io.prestosql.geospatial.KdbTreeUtils) Capture.newCapture(io.prestosql.matching.Capture.newCapture) FilterNode(io.prestosql.spi.plan.FilterNode) OperatorType(io.prestosql.spi.function.OperatorType) Map(java.util.Map) FunctionMetadata(io.prestosql.spi.function.FunctionMetadata) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Type(io.prestosql.spi.type.Type) RowExpressionNodeInliner.replaceExpression(io.prestosql.expressions.RowExpressionNodeInliner.replaceExpression) Splitter(com.google.common.base.Splitter) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) UNGROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ImmutableMap(com.google.common.collect.ImmutableMap) CastType(io.prestosql.metadata.CastType) ArrayType(io.prestosql.spi.type.ArrayType) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SpatialJoinUtils.extractSupportedSpatialComparisons(io.prestosql.util.SpatialJoinUtils.extractSupportedSpatialComparisons) PlanNode(io.prestosql.spi.plan.PlanNode) ProjectNode(io.prestosql.spi.plan.ProjectNode) Metadata(io.prestosql.metadata.Metadata) String.format(java.lang.String.format) FunctionHandle(io.prestosql.spi.function.FunctionHandle) UncheckedIOException(java.io.UncheckedIOException) Captures(io.prestosql.matching.Captures) SpatialJoinNode(io.prestosql.sql.planner.plan.SpatialJoinNode) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Capture(io.prestosql.matching.Capture) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) TypeSignature(io.prestosql.spi.type.TypeSignature) SystemSessionProperties.isSpatialJoinEnabled(io.prestosql.SystemSessionProperties.isSpatialJoinEnabled) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Iterables(com.google.common.collect.Iterables) Patterns.source(io.prestosql.sql.planner.plan.Patterns.source) Patterns.join(io.prestosql.sql.planner.plan.Patterns.join) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) Pattern(io.prestosql.matching.Pattern) Split(io.prestosql.metadata.Split) TableHandle(io.prestosql.spi.metadata.TableHandle) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) UnnestNode(io.prestosql.sql.planner.plan.UnnestNode) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) SpatialJoinUtils.extractSupportedSpatialFunctions(io.prestosql.util.SpatialJoinUtils.extractSupportedSpatialFunctions) PageSourceManager(io.prestosql.split.PageSourceManager) SplitSource(io.prestosql.split.SplitSource) SpatialJoinUtils(io.prestosql.util.SpatialJoinUtils) JoinNode(io.prestosql.spi.plan.JoinNode) Lifespan(io.prestosql.execution.Lifespan) Symbol(io.prestosql.spi.plan.Symbol) SpatialJoinUtils.getFlippedFunctionHandle(io.prestosql.util.SpatialJoinUtils.getFlippedFunctionHandle) TypeSignatureProvider.fromTypes(io.prestosql.sql.analyzer.TypeSignatureProvider.fromTypes) SplitBatch(io.prestosql.split.SplitSource.SplitBatch) Assignments(io.prestosql.spi.plan.Assignments) Rule(io.prestosql.sql.planner.iterative.Rule) Patterns.filter(io.prestosql.sql.planner.plan.Patterns.filter) Context(io.prestosql.sql.planner.iterative.Rule.Context) Page(io.prestosql.spi.Page) IOException(java.io.IOException) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) Expressions(io.prestosql.sql.relational.Expressions) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) SplitManager(io.prestosql.split.SplitManager) RowExpression(io.prestosql.spi.relation.RowExpression) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) LEFT(io.prestosql.spi.plan.JoinNode.Type.LEFT) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) KdbTree(io.prestosql.geospatial.KdbTree) PrestoException(io.prestosql.spi.PrestoException) Page(io.prestosql.spi.Page) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) SplitBatch(io.prestosql.split.SplitSource.SplitBatch) TableHandle(io.prestosql.spi.metadata.TableHandle) SplitSource(io.prestosql.split.SplitSource) Split(io.prestosql.metadata.Split)

Example 30 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class ConnectorAwareSplitSource method groupSmallSplits.

public List<Split> groupSmallSplits(List<Split> pendingSplits, Lifespan lifespan, int maxGroupSize) {
    List<ConnectorSplit> connectorSplits = new ArrayList<>();
    for (Split split : pendingSplits) {
        connectorSplits.add(split.getConnectorSplit());
    }
    List<ConnectorSplit> connectorSplits1 = source.groupSmallSplits(connectorSplits, maxGroupSize);
    ImmutableList.Builder<Split> result = ImmutableList.builder();
    for (ConnectorSplit connectorSplit : connectorSplits1) {
        result.add(new Split(catalogName, connectorSplit, lifespan));
    }
    return result.build();
}

Also used : ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit)

Aggregations

Split (io.prestosql.metadata.Split)69 Test (org.testng.annotations.Test)35 InternalNode (io.prestosql.metadata.InternalNode)34 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)33 TestingSplit (io.prestosql.testing.TestingSplit)32 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)31 MockSplit (io.prestosql.MockSplit)20 CatalogName (io.prestosql.spi.connector.CatalogName)18 ArrayList (java.util.ArrayList)18 ImmutableList (com.google.common.collect.ImmutableList)17 UUID (java.util.UUID)17 HashMap (java.util.HashMap)15 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)14 NodeTaskMap (io.prestosql.execution.NodeTaskMap)13 RemoteTask (io.prestosql.execution.RemoteTask)13 QualifiedObjectName (io.prestosql.spi.connector.QualifiedObjectName)13 SqlStageExecution (io.prestosql.execution.SqlStageExecution)12 HashSet (java.util.HashSet)12 Optional (java.util.Optional)12 Lifespan (io.prestosql.execution.Lifespan)11