Search in sources :

Example 31 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactorOperatorStateHandler method processElement.

@Override
public void processElement(StreamRecord<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> element) throws Exception {
    Either<CommittableMessage<FileSinkCommittable>, CompactorRequest> record = element.getValue();
    if (stateDrained) {
        // all input should be committable messages to pass through
        output.collect(new StreamRecord<>(record.left()));
        return;
    }
    if (record.isRight()) {
        submit(element.getValue().right());
        return;
    }
    CommittableMessage<FileSinkCommittable> message = record.left();
    if (message instanceof CommittableSummary) {
        checkState(holdingSummary == null, "Duplicate summary before the first checkpoint.");
        holdingSummary = (CommittableSummary<FileSinkCommittable>) message;
        holdingMessages = new ArrayList<>(holdingSummary.getNumberOfCommittables());
    } else {
        boolean compacting = false;
        CommittableWithLineage<FileSinkCommittable> committableWithLineage = (CommittableWithLineage<FileSinkCommittable>) message;
        if (committableWithLineage.getCommittable().hasPendingFile()) {
            FileSinkCommittable committable = committableWithLineage.getCommittable();
            PendingFileRecoverable pendingFile = committable.getPendingFile();
            if (pendingFile.getPath() != null && pendingFile.getPath().getName().startsWith(".")) {
                // The pending file is the in-progress file of the previous run, which
                // should be committed and compacted before sending to the committer.
                CompactorRequest request = new CompactorRequest(committable.getBucketId());
                request.addToCompact(committable);
                submit(request);
                compacting = true;
                compactingMessages.add(message);
            } else {
                // A normal file is received, indicating the writer state is drained.
                writerStateDrained = true;
                if (compactingMessages.isEmpty() && compactingRequests.isEmpty()) {
                    // No state needs to be handled, the holding summary and all committable
                    // messages can be sent eagerly
                    checkState(holdingSummary != null);
                    output.collect(new StreamRecord<>(holdingSummary));
                    holdingSummary = null;
                    this.stateDrained = true;
                    output.collect(new StreamRecord<>(committableWithLineage));
                }
            }
        }
        if (!compacting && !stateDrained) {
            // Compacting messages should not be added
            // If the state is drained, no further messages need to be added
            holdingMessages.add(message);
        }
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 32 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactService method compact.

@SuppressWarnings({ "rawtypes", "unchecked" })
private Iterable<FileSinkCommittable> compact(CompactorRequest request) throws Exception {
    List<FileSinkCommittable> results = new ArrayList<>(request.getCommittableToPassthrough());
    List<Path> compactingFiles = getCompactingPath(request);
    if (compactingFiles.isEmpty()) {
        return results;
    }
    Path targetPath = assembleCompactedFilePath(compactingFiles.get(0));
    CompactingFileWriter compactingFileWriter = bucketWriter.openNewCompactingFile(compactingWriterType, request.getBucketId(), targetPath, System.currentTimeMillis());
    if (compactingWriterType == Type.RECORD_WISE) {
        ((RecordWiseFileCompactor) fileCompactor).compact(compactingFiles, ((RecordWiseCompactingFileWriter) compactingFileWriter)::write);
    } else if (compactingWriterType == CompactingFileWriter.Type.OUTPUT_STREAM) {
        ((OutputStreamBasedFileCompactor) fileCompactor).compact(compactingFiles, ((OutputStreamBasedCompactingFileWriter) compactingFileWriter).asOutputStream());
    }
    PendingFileRecoverable compactedPendingFile = compactingFileWriter.closeForCommit();
    FileSinkCommittable compacted = new FileSinkCommittable(request.getBucketId(), compactedPendingFile);
    results.add(compacted);
    for (Path f : compactingFiles) {
        // cleanup compacted files
        results.add(new FileSinkCommittable(request.getBucketId(), f));
    }
    return results;
}
Also used : Path(org.apache.flink.core.fs.Path) OutputStreamBasedCompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedCompactingFileWriter) RecordWiseFileCompactor(org.apache.flink.connector.file.sink.compactor.RecordWiseFileCompactor) ArrayList(java.util.ArrayList) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) CompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.CompactingFileWriter) OutputStreamBasedCompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedCompactingFileWriter) RecordWiseCompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.RecordWiseCompactingFileWriter)

Aggregations

FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)32 Test (org.junit.Test)22 CommittableMessage (org.apache.flink.streaming.api.connector.sink2.CommittableMessage)12 Path (org.apache.flink.core.fs.Path)11 CompactorRequest (org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest)10 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)10 File (java.io.File)8 CompactCoordinator (org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator)8 ArrayList (java.util.ArrayList)7 CommittableWithLineage (org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage)6 CommittableSummary (org.apache.flink.streaming.api.connector.sink2.CommittableSummary)5 List (java.util.List)4 MockCommitRequest (org.apache.flink.api.connector.sink2.mocks.MockCommitRequest)4 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)4 PendingFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable)4 Map (java.util.Map)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 Collectors (java.util.stream.Collectors)2