Search in sources :

Example 1 with PendingFileRecoverable

use of org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable in project flink by apache.

the class CompactService method getCompactingPath.

private List<Path> getCompactingPath(CompactorRequest request) throws IOException {
    List<FileSinkCommittable> compactingCommittable = request.getCommittableToCompact();
    List<Path> compactingFiles = new ArrayList<>();
    for (FileSinkCommittable committable : compactingCommittable) {
        PendingFileRecoverable pendingFile = committable.getPendingFile();
        checkState(pendingFile != null, "Illegal committable to compact, pending file is null.");
        Path pendingPath = pendingFile.getPath();
        checkState(pendingPath != null && pendingPath.getName().startsWith("."), "Illegal pending file to compact, path should start with . but is " + pendingPath);
        // commit the pending file and compact the committed file
        bucketWriter.recoverPendingFile(pendingFile).commitAfterRecovery();
        compactingFiles.add(pendingPath);
    }
    return compactingFiles;
}
Also used : Path(org.apache.flink.core.fs.Path) ArrayList(java.util.ArrayList) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 2 with PendingFileRecoverable

use of org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable in project flink by apache.

the class OutputStreamBasedPartFileRecoverableMigrationTest method testSerializationPending.

@Test
public void testSerializationPending() throws IOException {
    String scenario = "pending";
    java.nio.file.Path path = resolveVersionPath(previousVersion, scenario);
    RecoverableWriter writer = FileSystem.getLocalFileSystem().createRecoverableWriter();
    OutputStreamBasedPendingFileRecoverableSerializer serializer = new OutputStreamBasedPendingFileRecoverableSerializer(writer.getCommitRecoverableSerializer());
    PendingFileRecoverable recoverable = serializer.deserialize(previousVersion, Files.readAllBytes(path.resolve("recoverable")));
    Assert.assertTrue(recoverable instanceof OutputStreamBasedPendingFileRecoverable);
    // make sure the CommitRecoverable is valid
    writer.recoverForCommit(((OutputStreamBasedPendingFileRecoverable) recoverable).getCommitRecoverable());
}
Also used : RecoverableWriter(org.apache.flink.core.fs.RecoverableWriter) OutputStreamBasedPendingFileRecoverableSerializer(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedPartFileWriter.OutputStreamBasedPendingFileRecoverableSerializer) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) OutputStreamBasedPendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedPartFileWriter.OutputStreamBasedPendingFileRecoverable) OutputStreamBasedPendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedPartFileWriter.OutputStreamBasedPendingFileRecoverable) Test(org.junit.Test)

Example 3 with PendingFileRecoverable

use of org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable in project flink by apache.

the class CompactorOperatorTest method createTestBucketWriter.

private BucketWriter<?, String> createTestBucketWriter() {
    return new BucketWriter<Integer, String>() {

        @Override
        public InProgressFileWriter<Integer, String> openNewInProgressFile(String bucketId, Path path, long creationTime) throws IOException {
            return new InProgressFileWriter<Integer, String>() {

                BufferedWriter writer;

                long size = 0L;

                @Override
                public void write(Integer element, long currentTime) throws IOException {
                    if (writer == null) {
                        writer = new BufferedWriter(new FileWriter(path.toString()));
                    }
                    writer.write(element);
                    size += 1;
                }

                @Override
                public InProgressFileRecoverable persist() throws IOException {
                    return new TestInProgressFileRecoverable(path, size);
                }

                @Override
                public PendingFileRecoverable closeForCommit() throws IOException {
                    return new TestPendingFileRecoverable(path, size);
                }

                @Override
                public void dispose() {
                }

                @Override
                public String getBucketId() {
                    return bucketId;
                }

                @Override
                public long getCreationTime() {
                    return 0;
                }

                @Override
                public long getSize() throws IOException {
                    return size;
                }

                @Override
                public long getLastUpdateTime() {
                    return 0;
                }
            };
        }

        @Override
        public InProgressFileWriter<Integer, String> resumeInProgressFileFrom(String s, InProgressFileRecoverable inProgressFileSnapshot, long creationTime) throws IOException {
            return null;
        }

        @Override
        public WriterProperties getProperties() {
            return null;
        }

        @Override
        public PendingFile recoverPendingFile(PendingFileRecoverable pendingFileRecoverable) throws IOException {
            return new PendingFile() {

                @Override
                public void commit() throws IOException {
                    TestPendingFileRecoverable testRecoverable = (TestPendingFileRecoverable) pendingFileRecoverable;
                    if (testRecoverable.getPath() != null) {
                        if (!testRecoverable.getPath().equals(testRecoverable.getUncommittedPath())) {
                            testRecoverable.getPath().getFileSystem().rename(testRecoverable.getUncommittedPath(), testRecoverable.getPath());
                        }
                    }
                }

                @Override
                public void commitAfterRecovery() throws IOException {
                    commit();
                }
            };
        }

        @Override
        public boolean cleanupInProgressFileRecoverable(InProgressFileRecoverable inProgressFileRecoverable) throws IOException {
            return false;
        }

        @Override
        public CompactingFileWriter openNewCompactingFile(CompactingFileWriter.Type type, String bucketId, Path path, long creationTime) throws IOException {
            if (type == CompactingFileWriter.Type.RECORD_WISE) {
                return openNewInProgressFile(bucketId, path, creationTime);
            } else {
                FileOutputStream fileOutputStream = new FileOutputStream(path.toString());
                return new OutputStreamBasedCompactingFileWriter() {

                    @Override
                    public OutputStream asOutputStream() throws IOException {
                        return fileOutputStream;
                    }

                    @Override
                    public PendingFileRecoverable closeForCommit() throws IOException {
                        fileOutputStream.flush();
                        return new TestPendingFileRecoverable(path, fileOutputStream.getChannel().position());
                    }
                };
            }
        }
    };
}
Also used : Path(org.apache.flink.core.fs.Path) TestPendingFileRecoverable(org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestPendingFileRecoverable) OutputStreamBasedCompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedCompactingFileWriter) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) OutputStreamBasedCompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedCompactingFileWriter) FileWriter(java.io.FileWriter) CompactingFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.CompactingFileWriter) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) TestInProgressFileRecoverable(org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestInProgressFileRecoverable) BufferedWriter(java.io.BufferedWriter) TestInProgressFileRecoverable(org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestInProgressFileRecoverable) InProgressFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.InProgressFileRecoverable) BucketWriter(org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter) FileOutputStream(java.io.FileOutputStream) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) TestPendingFileRecoverable(org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestPendingFileRecoverable)

Example 4 with PendingFileRecoverable

use of org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable in project flink by apache.

the class CompactorOperatorStateHandler method processElement.

@Override
public void processElement(StreamRecord<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> element) throws Exception {
    Either<CommittableMessage<FileSinkCommittable>, CompactorRequest> record = element.getValue();
    if (stateDrained) {
        // all input should be committable messages to pass through
        output.collect(new StreamRecord<>(record.left()));
        return;
    }
    if (record.isRight()) {
        submit(element.getValue().right());
        return;
    }
    CommittableMessage<FileSinkCommittable> message = record.left();
    if (message instanceof CommittableSummary) {
        checkState(holdingSummary == null, "Duplicate summary before the first checkpoint.");
        holdingSummary = (CommittableSummary<FileSinkCommittable>) message;
        holdingMessages = new ArrayList<>(holdingSummary.getNumberOfCommittables());
    } else {
        boolean compacting = false;
        CommittableWithLineage<FileSinkCommittable> committableWithLineage = (CommittableWithLineage<FileSinkCommittable>) message;
        if (committableWithLineage.getCommittable().hasPendingFile()) {
            FileSinkCommittable committable = committableWithLineage.getCommittable();
            PendingFileRecoverable pendingFile = committable.getPendingFile();
            if (pendingFile.getPath() != null && pendingFile.getPath().getName().startsWith(".")) {
                // The pending file is the in-progress file of the previous run, which
                // should be committed and compacted before sending to the committer.
                CompactorRequest request = new CompactorRequest(committable.getBucketId());
                request.addToCompact(committable);
                submit(request);
                compacting = true;
                compactingMessages.add(message);
            } else {
                // A normal file is received, indicating the writer state is drained.
                writerStateDrained = true;
                if (compactingMessages.isEmpty() && compactingRequests.isEmpty()) {
                    // No state needs to be handled, the holding summary and all committable
                    // messages can be sent eagerly
                    checkState(holdingSummary != null);
                    output.collect(new StreamRecord<>(holdingSummary));
                    holdingSummary = null;
                    this.stateDrained = true;
                    output.collect(new StreamRecord<>(committableWithLineage));
                }
            }
        }
        if (!compacting && !stateDrained) {
            // Compacting messages should not be added
            // If the state is drained, no further messages need to be added
            holdingMessages.add(message);
        }
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Example 5 with PendingFileRecoverable

use of org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable in project flink by apache.

the class FileWriterBucketStateSerializer method internalDeserialize.

private FileWriterBucketState internalDeserialize(DataInputView dataInputView, FunctionWithException<DataInputView, InProgressFileRecoverable, IOException> inProgressFileParser, @Nullable BiFunctionWithException<Integer, byte[], PendingFileRecoverable, IOException> pendingFileParser) throws IOException {
    String bucketId = SimpleVersionedSerialization.readVersionAndDeSerialize(SimpleVersionedStringSerializer.INSTANCE, dataInputView);
    String bucketPathStr = dataInputView.readUTF();
    long creationTime = dataInputView.readLong();
    // then get the current resumable stream
    InProgressFileRecoverable current = null;
    if (dataInputView.readBoolean()) {
        current = inProgressFileParser.apply(dataInputView);
    }
    HashMap<Long, List<InProgressFileWriter.PendingFileRecoverable>> pendingFileRecoverablesPerCheckpoint = new HashMap<>();
    if (pendingFileParser != null) {
        final int pendingFileRecoverableSerializerVersion = dataInputView.readInt();
        final int numCheckpoints = dataInputView.readInt();
        for (int i = 0; i < numCheckpoints; i++) {
            final long checkpointId = dataInputView.readLong();
            final int numOfPendingFileRecoverables = dataInputView.readInt();
            final List<InProgressFileWriter.PendingFileRecoverable> pendingFileRecoverables = new ArrayList<>(numOfPendingFileRecoverables);
            for (int j = 0; j < numOfPendingFileRecoverables; j++) {
                final byte[] bytes = new byte[dataInputView.readInt()];
                dataInputView.readFully(bytes);
                pendingFileRecoverables.add(pendingFileParser.apply(pendingFileRecoverableSerializerVersion, bytes));
            }
            pendingFileRecoverablesPerCheckpoint.put(checkpointId, pendingFileRecoverables);
        }
    }
    return new FileWriterBucketState(bucketId, new Path(bucketPathStr), creationTime, current, pendingFileRecoverablesPerCheckpoint);
}
Also used : Path(org.apache.flink.core.fs.Path) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InProgressFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.InProgressFileRecoverable) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

PendingFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable)6 Path (org.apache.flink.core.fs.Path)4 ArrayList (java.util.ArrayList)3 FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)3 CompactingFileWriter (org.apache.flink.streaming.api.functions.sink.filesystem.CompactingFileWriter)2 InProgressFileWriter (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter)2 InProgressFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.InProgressFileRecoverable)2 OutputStreamBasedCompactingFileWriter (org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedCompactingFileWriter)2 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 FileWriter (java.io.FileWriter)1 HashMap (java.util.HashMap)1 List (java.util.List)1 RecordWiseFileCompactor (org.apache.flink.connector.file.sink.compactor.RecordWiseFileCompactor)1 TestInProgressFileRecoverable (org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestInProgressFileRecoverable)1 TestPendingFileRecoverable (org.apache.flink.connector.file.sink.utils.FileSinkTestUtils.TestPendingFileRecoverable)1 RecoverableWriter (org.apache.flink.core.fs.RecoverableWriter)1 CommittableMessage (org.apache.flink.streaming.api.connector.sink2.CommittableMessage)1 CommittableSummary (org.apache.flink.streaming.api.connector.sink2.CommittableSummary)1 CommittableWithLineage (org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage)1