Search in sources :

Example 1 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestScanFilterAndProjectOperator method testRecordCursorYield.

@Test
public void testRecordCursorYield() {
    // create a generic long function that yields for projection on every row
    // verify we will yield #row times totally
    // create a table with 15 rows
    int length = 15;
    Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(BIGINT), length, 0);
    DriverContext driverContext = newDriverContext();
    // set up generic long function with a callback to force yield
    Metadata localMetadata = functionAssertions.getMetadata();
    localMetadata.getFunctionAndTypeManager().registerBuiltInFunctions(ImmutableList.of(new GenericLongFunction("record_cursor", value -> {
        driverContext.getYieldSignal().forceYieldForTesting();
        return value;
    })));
    ExpressionCompiler compiler = new ExpressionCompiler(localMetadata, new PageFunctionCompiler(localMetadata, 0));
    List<RowExpression> projections = ImmutableList.of(call(QualifiedObjectName.valueOfDefaultFunction("generic_long_record_cursor").toString(), new BuiltInFunctionHandle(internalScalarFunction(QualifiedObjectName.valueOfDefaultFunction("generic_long_record_cursor"), BIGINT.getTypeSignature(), ImmutableList.of(BIGINT.getTypeSignature()))), BIGINT, field(0, BIGINT)));
    Supplier<CursorProcessor> cursorProcessor = compiler.compileCursorProcessor(Optional.empty(), projections, "key");
    Supplier<PageProcessor> pageProcessor = compiler.compilePageProcessor(Optional.empty(), projections);
    ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns, dynamicFilter) -> new RecordPageSource(new PageRecordSet(ImmutableList.of(BIGINT), input)), cursorProcessor, pageProcessor, TEST_TABLE_HANDLE, ImmutableList.of(), null, ImmutableList.of(BIGINT), new DataSize(0, BYTE), 0, ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_DEFAULT, new UUID(0, 0), false, Optional.empty(), 0, 0);
    SourceOperator operator = factory.createOperator(driverContext);
    operator.addSplit(new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), Lifespan.taskWide()));
    operator.noMoreSplits();
    // start driver; get null value due to yield for the first 15 times
    for (int i = 0; i < length; i++) {
        driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
        assertNull(operator.getOutput());
        driverContext.getYieldSignal().reset();
    }
    // the 16th yield is not going to prevent the operator from producing a page
    driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
    Page output = operator.getOutput();
    driverContext.getYieldSignal().reset();
    assertNotNull(output);
    assertEquals(toValues(BIGINT, output.getBlock(0)), toValues(BIGINT, input.getBlock(0)));
}
Also used : PageFunctionCompiler(io.prestosql.sql.gen.PageFunctionCompiler) CursorProcessor(io.prestosql.operator.project.CursorProcessor) Metadata(io.prestosql.metadata.Metadata) Page(io.prestosql.spi.Page) PageRecordSet(io.prestosql.operator.index.PageRecordSet) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) PageProcessor(io.prestosql.operator.project.PageProcessor) DataSize(io.airlift.units.DataSize) UUID(java.util.UUID) RowExpression(io.prestosql.spi.relation.RowExpression) BuiltInFunctionHandle(io.prestosql.spi.function.BuiltInFunctionHandle) ExpressionCompiler(io.prestosql.sql.gen.ExpressionCompiler) CatalogName(io.prestosql.spi.connector.CatalogName) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) Test(org.testng.annotations.Test)

Example 2 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class SqlStageExecution method scheduleTask.

private synchronized RemoteTask scheduleTask(InternalNode node, TaskId taskId, String instanceId, Multimap<PlanNodeId, Split> sourceSplits, OptionalInt totalPartitions) {
    checkArgument(!allTasks.contains(taskId), "A task with id %s already exists", taskId);
    if (SystemSessionProperties.isSnapshotEnabled(stateMachine.getSession())) {
        // Snapshot: inform snapshot manager so it knows about all tasks,
        // and can determine if a snapshot is complete for all tasks.
        snapshotManager.addNewTask(taskId);
    }
    ImmutableMultimap.Builder<PlanNodeId, Split> initialSplits = ImmutableMultimap.builder();
    initialSplits.putAll(sourceSplits);
    sourceTasks.forEach((planNodeId, task) -> {
        if (task.getTaskStatus().getState() != TaskState.FINISHED) {
            initialSplits.put(planNodeId, newConnectSplit(taskId, task));
        }
    });
    OutputBuffers localOutputBuffers = this.outputBuffers.get();
    checkState(localOutputBuffers != null, "Initial output buffers must be set before a task can be scheduled");
    RemoteTask task = remoteTaskFactory.createRemoteTask(stateMachine.getSession(), taskId, instanceId, node, stateMachine.getFragment(), initialSplits.build(), totalPartitions, localOutputBuffers, nodeTaskMap.createPartitionedSplitCountTracker(node, taskId), summarizeTaskInfo, Optional.ofNullable(parentId), snapshotManager);
    completeSources.forEach(task::noMoreSplits);
    allTasks.add(taskId);
    tasks.computeIfAbsent(node, key -> newConcurrentHashSet()).add(task);
    nodeTaskMap.addTask(node, task);
    task.addStateChangeListener(new StageTaskListener());
    task.addFinalTaskInfoListener(this::updateFinalTaskInfo);
    if (!stateMachine.getState().isDone()) {
        task.start();
    } else {
        // stage finished while we were scheduling this task
        task.abort();
    }
    return task;
}
Also used : PlanNodeId(io.prestosql.spi.plan.PlanNodeId) REMOTE_HOST_GONE(io.prestosql.spi.StandardErrorCode.REMOTE_HOST_GONE) SystemSessionProperties(io.prestosql.SystemSessionProperties) SystemSessionProperties.isSnapshotEnabled(io.prestosql.SystemSessionProperties.isSnapshotEnabled) PlanFragmentId(io.prestosql.sql.planner.plan.PlanFragmentId) Duration(io.airlift.units.Duration) HttpPageBufferClient(io.prestosql.operator.HttpPageBufferClient) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HashMultimap(com.google.common.collect.HashMultimap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) URI(java.net.URI) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) GONE(io.prestosql.failuredetector.FailureDetector.State.GONE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TableScanNode(io.prestosql.spi.plan.TableScanNode) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) PlanNode(io.prestosql.spi.plan.PlanNode) UUID(java.util.UUID) GuardedBy(javax.annotation.concurrent.GuardedBy) REMOTE_CONNECTOR_ID(io.prestosql.operator.ExchangeOperator.REMOTE_CONNECTOR_ID) Sets(com.google.common.collect.Sets) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) SplitSchedulerStats(io.prestosql.execution.scheduler.SplitSchedulerStats) Entry(java.util.Map.Entry) HttpUriBuilder.uriBuilderFrom(io.airlift.http.client.HttpUriBuilder.uriBuilderFrom) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Optional(java.util.Optional) StateChangeListener(io.prestosql.execution.StateMachine.StateChangeListener) Logger(io.airlift.log.Logger) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RemoteSourceNode(io.prestosql.sql.planner.plan.RemoteSourceNode) Split(io.prestosql.metadata.Split) Multimap(com.google.common.collect.Multimap) OutputBuffers(io.prestosql.execution.buffer.OutputBuffers) OptionalInt(java.util.OptionalInt) AtomicReference(java.util.concurrent.atomic.AtomicReference) SemiJoinNode(io.prestosql.sql.planner.plan.SemiJoinNode) ArrayList(java.util.ArrayList) SystemSessionProperties.isEnableDynamicFiltering(io.prestosql.SystemSessionProperties.isEnableDynamicFiltering) HashSet(java.util.HashSet) RemoteSplit(io.prestosql.split.RemoteSplit) ImmutableList(com.google.common.collect.ImmutableList) SystemSessionProperties.isReuseTableScanEnabled(io.prestosql.SystemSessionProperties.isReuseTableScanEnabled) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) QueryId(io.prestosql.spi.QueryId) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ExecutorService(java.util.concurrent.ExecutorService) JoinNode(io.prestosql.spi.plan.JoinNode) PlanFragment(io.prestosql.sql.planner.PlanFragment) SimpleHttpResponseHandler(io.prestosql.server.remotetask.SimpleHttpResponseHandler) Executor(java.util.concurrent.Executor) InternalNode(io.prestosql.metadata.InternalNode) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) HttpStatus(io.airlift.http.client.HttpStatus) FailureDetector(io.prestosql.failuredetector.FailureDetector) Collections(java.util.Collections) OutputBuffers(io.prestosql.execution.buffer.OutputBuffers) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) Split(io.prestosql.metadata.Split) RemoteSplit(io.prestosql.split.RemoteSplit)

Example 3 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestSystemMemoryBlocking method testTableScanSystemMemoryBlocking.

@Test
public void testTableScanSystemMemoryBlocking() {
    PlanNodeId sourceId = new PlanNodeId("source");
    final List<Type> types = ImmutableList.of(VARCHAR);
    TableScanOperator source = new TableScanOperator(driverContext.addOperatorContext(1, new PlanNodeId("test"), "values"), sourceId, (session, split, table, columns, dynamicFilter) -> new FixedPageSource(rowPagesBuilder(types).addSequencePage(10, 1).addSequencePage(10, 1).addSequencePage(10, 1).addSequencePage(10, 1).addSequencePage(10, 1).build()), TEST_TABLE_HANDLE, ImmutableList.of(), ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_DEFAULT, new UUID(0, 0), types, false, Optional.empty(), 0, 0);
    PageConsumerOperator sink = createSinkOperator(types);
    Driver driver = Driver.createDriver(driverContext, source, sink);
    assertSame(driver.getDriverContext(), driverContext);
    assertFalse(driver.isFinished());
    Split testSplit = new Split(new CatalogName("test"), new TestSplit(), Lifespan.taskWide());
    driver.updateSource(new TaskSource(sourceId, ImmutableSet.of(new ScheduledSplit(0, sourceId, testSplit)), true));
    ListenableFuture<?> blocked = driver.processFor(new Duration(1, NANOSECONDS));
    // the driver shouldn't block in the first call as it will be able to move a page between source and the sink operator
    // but the operator should be blocked
    assertTrue(blocked.isDone());
    assertFalse(source.getOperatorContext().isWaitingForMemory().isDone());
    // and they should stay blocked until more memory becomes available
    for (int i = 0; i < 10; i++) {
        blocked = driver.processFor(new Duration(1, NANOSECONDS));
        assertFalse(blocked.isDone());
        assertFalse(source.getOperatorContext().isWaitingForMemory().isDone());
    }
    // free up some memory
    memoryPool.free(QUERY_ID, "test", memoryPool.getReservedBytes());
    // the operator should be unblocked
    assertTrue(source.getOperatorContext().isWaitingForMemory().isDone());
    // the driver shouldn't be blocked
    blocked = driver.processFor(new Duration(1, NANOSECONDS));
    assertTrue(blocked.isDone());
}
Also used : TableScanOperator(io.prestosql.operator.TableScanOperator) ScheduledSplit(io.prestosql.execution.ScheduledSplit) Driver(io.prestosql.operator.Driver) Duration(io.airlift.units.Duration) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) PageConsumerOperator(io.prestosql.testing.PageConsumerOperator) Type(io.prestosql.spi.type.Type) CatalogName(io.prestosql.spi.connector.CatalogName) UUID(java.util.UUID) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) ScheduledSplit(io.prestosql.execution.ScheduledSplit) TaskSource(io.prestosql.execution.TaskSource) Test(org.testng.annotations.Test)

Example 4 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class TestHttpRemoteTask method testRegular.

@Test(timeOut = 30000)
public void testRegular() throws Exception {
    AtomicLong lastActivityNanos = new AtomicLong(System.nanoTime());
    TestingTaskResource testingTaskResource = new TestingTaskResource(lastActivityNanos, FailureScenario.NO_FAILURE);
    HttpRemoteTaskFactory httpRemoteTaskFactory = createHttpRemoteTaskFactory(testingTaskResource);
    RemoteTask remoteTask = createRemoteTask(httpRemoteTaskFactory);
    testingTaskResource.setInitialTaskInfo(remoteTask.getTaskInfo());
    remoteTask.start();
    Lifespan lifespan = Lifespan.driverGroup(3);
    remoteTask.addSplits(ImmutableMultimap.of(TABLE_SCAN_NODE_ID, new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), lifespan)));
    poll(() -> testingTaskResource.getTaskSource(TABLE_SCAN_NODE_ID) != null);
    poll(() -> testingTaskResource.getTaskSource(TABLE_SCAN_NODE_ID).getSplits().size() == 1);
    remoteTask.noMoreSplits(TABLE_SCAN_NODE_ID, lifespan);
    poll(() -> testingTaskResource.getTaskSource(TABLE_SCAN_NODE_ID).getNoMoreSplitsForLifespan().size() == 1);
    remoteTask.noMoreSplits(TABLE_SCAN_NODE_ID);
    poll(() -> testingTaskResource.getTaskSource(TABLE_SCAN_NODE_ID).isNoMoreSplits());
    remoteTask.cancel();
    poll(() -> remoteTask.getTaskStatus().getState().isDone());
    poll(() -> remoteTask.getTaskInfo().getTaskStatus().getState().isDone());
    httpRemoteTaskFactory.stop();
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) HttpRemoteTaskFactory(io.prestosql.server.HttpRemoteTaskFactory) RemoteTask(io.prestosql.execution.RemoteTask) CatalogName(io.prestosql.spi.connector.CatalogName) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) Lifespan(io.prestosql.execution.Lifespan) Test(org.testng.annotations.Test)

Example 5 with Split

use of io.prestosql.metadata.Split in project hetu-core by openlookeng.

the class MarkerSplitSource method getNextBatchImpl.

private ListenableFuture<SplitBatch> getNextBatchImpl(ConnectorPartitionHandle partitionHandle, Lifespan lifespan, int maxSize) {
    checkArgument(maxSize > 0, "Cannot fetch a batch of zero size");
    if (resumingSnapshotId.isPresent()) {
        sentInitialMarker = true;
        boolean lastBatch = sourceExhausted && bufferPosition == splitBuffer.size();
        SplitBatch batch = recordSnapshot(lifespan, true, resumingSnapshotId.getAsLong(), lastBatch);
        resumingSnapshotId = OptionalLong.empty();
        return Futures.immediateFuture(batch);
    }
    if (!sentInitialMarker) {
        sentInitialMarker = true;
        // Send initial empty marker, to trigger creation of tasks. This marker is ignored by SqlTaskExecution.
        Split marker = new Split(getCatalogName(), MarkerSplit.snapshotSplit(getCatalogName(), 0), lifespan);
        SplitBatch batch = new SplitBatch(Collections.singletonList(marker), false);
        return Futures.immediateFuture(batch);
    }
    if (sourceExhausted && bufferPosition == splitBuffer.size()) {
        if (!unionSources.isEmpty() && !remainingUnionSources.contains(this)) {
            boolean lastBatch = remainingUnionSources.isEmpty();
            OptionalLong snapshotId = announcer.shouldGenerateMarker(this);
            if (snapshotId.isPresent() && (!lastMarkerForUnion.isPresent() || snapshotId.getAsLong() <= lastMarkerForUnion.getAsLong())) {
                SplitBatch batch = recordSnapshot(lifespan, false, snapshotId.getAsLong(), lastBatch);
                return Futures.immediateFuture(batch);
            }
            if (lastBatch) {
                sentFinalMarker = true;
                deactivate();
            }
            SplitBatch batch = new SplitBatch(ImmutableList.of(), lastBatch);
            return Futures.immediateFuture(batch);
        }
        // Force send last-batch marker
        long sid = announcer.forceGenerateMarker(this);
        SplitBatch batch = recordSnapshot(lifespan, false, sid, true);
        return Futures.immediateFuture(batch);
    }
    OptionalLong snapshotId = announcer.shouldGenerateMarker(this);
    if (snapshotId.isPresent()) {
        SplitBatch batch = recordSnapshot(lifespan, false, snapshotId.getAsLong(), false);
        return Futures.immediateFuture(batch);
    }
    if (!remainingDependencies.isEmpty()) {
        // which will pass through join operators, and can be completed.
        return Futures.immediateFuture(new SplitBatch(Collections.emptyList(), false));
    }
    // Get next batch of "data" splits, then determine if marker should be added.
    ListenableFuture<SplitBatch> result = prepareNextBatch(partitionHandle, lifespan, maxSize);
    result = Futures.transform(result, batch -> {
        if (batch != null) {
            List<Split> splits = batch.getSplits();
            incrementSplitCount(splits.size());
            if (batch.isLastBatch()) {
                if (splits.size() == 0) {
                    // Force generate a marker for last batch. Marker can't be mixed with data splits.
                    long sid = announcer.forceGenerateMarker(this);
                    batch = recordSnapshot(lifespan, false, sid, true);
                } else {
                    // Don't send last-batch signal yet. Next call will generate a marker with last-batch.
                    batch = new SplitBatch(splits, false);
                }
            }
        }
        return batch;
    }, directExecutor());
    return result;
}
Also used : Lifespan(io.prestosql.execution.Lifespan) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Logger(io.airlift.log.Logger) Semaphore(java.util.concurrent.Semaphore) Collection(java.util.Collection) CatalogName(io.prestosql.spi.connector.CatalogName) Set(java.util.Set) HashMap(java.util.HashMap) Split(io.prestosql.metadata.Split) ArrayList(java.util.ArrayList) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) HashSet(java.util.HashSet) OptionalLong(java.util.OptionalLong) Futures(com.google.common.util.concurrent.Futures) List(java.util.List) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) ConnectorPartitionHandle(io.prestosql.spi.connector.ConnectorPartitionHandle) Map(java.util.Map) SplitSource(io.prestosql.split.SplitSource) Collections(java.util.Collections) OptionalLong(java.util.OptionalLong) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Split(io.prestosql.metadata.Split)

Aggregations

Split (io.prestosql.metadata.Split)65 Test (org.testng.annotations.Test)32 InternalNode (io.prestosql.metadata.InternalNode)31 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)29 TestingSplit (io.prestosql.testing.TestingSplit)29 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)28 MockSplit (io.prestosql.MockSplit)20 CatalogName (io.prestosql.spi.connector.CatalogName)17 ImmutableList (com.google.common.collect.ImmutableList)16 ArrayList (java.util.ArrayList)15 UUID (java.util.UUID)14 HashMap (java.util.HashMap)13 RemoteTask (io.prestosql.execution.RemoteTask)12 HashSet (java.util.HashSet)12 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)11 Optional (java.util.Optional)11 Lifespan (io.prestosql.execution.Lifespan)10 NodeTaskMap (io.prestosql.execution.NodeTaskMap)10 Page (io.prestosql.spi.Page)10 RowExpression (io.prestosql.spi.relation.RowExpression)10