Search in sources :

Example 1 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactorOperatorStateHandler method drain.

private void drain() throws ExecutionException, InterruptedException {
    checkState(holdingSummary != null);
    checkState(holdingSummary.getNumberOfPendingCommittables() == holdingSummary.getNumberOfCommittables() && holdingSummary.getNumberOfCommittables() == holdingMessages.size() + compactingMessages.size());
    Long checkpointId = holdingSummary.getCheckpointId().isPresent() ? holdingSummary.getCheckpointId().getAsLong() : null;
    int subtaskId = holdingSummary.getSubtaskId();
    if (!compactingRequests.isEmpty()) {
        CompletableFuture.allOf(compactingRequests.stream().map(r -> r.f1).toArray(CompletableFuture[]::new)).join();
        for (Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>> compacting : compactingRequests) {
            CompletableFuture<Iterable<FileSinkCommittable>> future = compacting.f1;
            checkState(future.isDone());
            // Exception is thrown if it's completed exceptionally
            for (FileSinkCommittable c : future.get()) {
                holdingMessages.add(new CommittableWithLineage<>(c, checkpointId, subtaskId));
            }
        }
    }
    // Appending the compacted committable to the holding summary
    CommittableSummary<FileSinkCommittable> summary = new CommittableSummary<>(holdingSummary.getSubtaskId(), holdingSummary.getNumberOfSubtasks(), holdingSummary.getCheckpointId().isPresent() ? holdingSummary.getCheckpointId().getAsLong() : null, holdingMessages.size(), holdingMessages.size(), holdingSummary.getNumberOfFailedCommittables());
    output.collect(new StreamRecord<>(summary));
    for (CommittableMessage<FileSinkCommittable> committable : holdingMessages) {
        output.collect(new StreamRecord<>(committable));
    }
    // Remaining requests should be all done and their results are all emitted.
    // From now on the operator is stateless.
    remainingRequestsState.clear();
    compactingRequests.clear();
    compactingMessages.clear();
    holdingSummary = null;
    holdingMessages = null;
    if (writerStateDrained) {
        // We can pass through everything if the writer state is also drained.
        stateDrained = true;
        compactService.close();
        compactService = null;
    }
}
Also used : CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Either(org.apache.flink.types.Either) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Map(java.util.Map) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) RemainingRequestsSerializer(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator.RemainingRequestsSerializer) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) BucketWriter(org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ExecutionException(java.util.concurrent.ExecutionException) CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) List(java.util.List) REMAINING_REQUESTS_RAW_STATES_DESC(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator.REMAINING_REQUESTS_RAW_STATES_DESC) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) SimpleVersionedListState(org.apache.flink.streaming.api.operators.util.SimpleVersionedListState) Internal(org.apache.flink.annotation.Internal) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) FileCompactor(org.apache.flink.connector.file.sink.compactor.FileCompactor) IdenticalFileCompactor(org.apache.flink.connector.file.sink.compactor.IdenticalFileCompactor) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CompletableFuture(java.util.concurrent.CompletableFuture) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 2 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactorOperator method emitCompacted.

private void emitCompacted(@Nullable Long checkpointId) throws Exception {
    List<FileSinkCommittable> compacted = new ArrayList<>();
    Iterator<Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>>> iter = compactingRequests.iterator();
    while (iter.hasNext()) {
        Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>> compacting = iter.next();
        CompletableFuture<Iterable<FileSinkCommittable>> future = compacting.f1;
        if (future.isDone()) {
            iter.remove();
            // Exception is thrown if it's completed exceptionally
            for (FileSinkCommittable c : future.get()) {
                compacted.add(c);
            }
        }
    }
    if (compacted.isEmpty()) {
        return;
    }
    // A summary must be sent before all results during this checkpoint
    CommittableSummary<FileSinkCommittable> summary = new CommittableSummary<>(getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks(), checkpointId, compacted.size(), compacted.size(), 0);
    output.collect(new StreamRecord<>(summary));
    for (FileSinkCommittable c : compacted) {
        CommittableWithLineage<FileSinkCommittable> comm = new CommittableWithLineage<>(c, checkpointId, getRuntimeContext().getIndexOfThisSubtask());
        output.collect(new StreamRecord<>(comm));
    }
}
Also used : CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) Tuple2(org.apache.flink.api.java.tuple.Tuple2) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 3 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactCoordinator method initializeState.

@Override
public void initializeState(StateInitializationContext context) throws Exception {
    super.initializeState(context);
    remainingCommittableState = new SimpleVersionedListState<>(context.getOperatorStateStore().getListState(REMAINING_COMMITTABLE_RAW_STATES_DESC), committableSerializer);
    Iterable<FileSinkCommittable> stateRemaining = remainingCommittableState.get();
    if (stateRemaining != null) {
        for (FileSinkCommittable committable : stateRemaining) {
            // restore and redistribute
            if (packAndTrigger(committable)) {
                fireAndPurge(committable.getBucketId());
            }
        }
    }
}
Also used : FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 4 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactCoordinatorStateHandler method initializeState.

@Override
public void initializeState(StateInitializationContext context) throws Exception {
    super.initializeState(context);
    ListState<FileSinkCommittable> remainingCommittableState = new SimpleVersionedListState<>(context.getOperatorStateStore().getListState(REMAINING_COMMITTABLE_RAW_STATES_DESC), committableSerializer);
    Iterable<FileSinkCommittable> stateRemaining = remainingCommittableState.get();
    if (stateRemaining != null) {
        for (FileSinkCommittable committable : stateRemaining) {
            // all committable should be wrapped with a single request, since multiple files
            // compacting is not available now
            String bucketId = committable.getBucketId();
            CompactorRequest request = new CompactorRequest(bucketId);
            request.addToCompact(committable);
            output.collect(new StreamRecord<>(Either.Right(request)));
        }
    }
    // Remaining committable should be all emitted, and the state can be cleared. From now on
    // the operator is stateless, snapshotState is not necessary.
    remainingCommittableState.clear();
}
Also used : SimpleVersionedListState(org.apache.flink.streaming.api.operators.util.SimpleVersionedListState) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 5 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactService method getCompactingPath.

private List<Path> getCompactingPath(CompactorRequest request) throws IOException {
    List<FileSinkCommittable> compactingCommittable = request.getCommittableToCompact();
    List<Path> compactingFiles = new ArrayList<>();
    for (FileSinkCommittable committable : compactingCommittable) {
        PendingFileRecoverable pendingFile = committable.getPendingFile();
        checkState(pendingFile != null, "Illegal committable to compact, pending file is null.");
        Path pendingPath = pendingFile.getPath();
        checkState(pendingPath != null && pendingPath.getName().startsWith("."), "Illegal pending file to compact, path should start with . but is " + pendingPath);
        // commit the pending file and compact the committed file
        bucketWriter.recoverPendingFile(pendingFile).commitAfterRecovery();
        compactingFiles.add(pendingPath);
    }
    return compactingFiles;
}
Also used : Path(org.apache.flink.core.fs.Path) ArrayList(java.util.ArrayList) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Aggregations

FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)32 Test (org.junit.Test)22 CommittableMessage (org.apache.flink.streaming.api.connector.sink2.CommittableMessage)12 Path (org.apache.flink.core.fs.Path)11 CompactorRequest (org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest)10 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)10 File (java.io.File)8 CompactCoordinator (org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator)8 ArrayList (java.util.ArrayList)7 CommittableWithLineage (org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage)6 CommittableSummary (org.apache.flink.streaming.api.connector.sink2.CommittableSummary)5 List (java.util.List)4 MockCommitRequest (org.apache.flink.api.connector.sink2.mocks.MockCommitRequest)4 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)4 PendingFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable)4 Map (java.util.Map)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 Collectors (java.util.stream.Collectors)2