Search in sources :

Example 1 with FileCommitter

use of org.apache.flink.connector.file.sink.committer.FileCommitter in project flink by apache.

the class FileWriterBucketStateSerializerMigrationTest method testDeserializationFull.

private void testDeserializationFull(final boolean withInProgress, final String scenarioName) throws IOException, InterruptedException {
    final BucketStatePathResolver pathResolver = new BucketStatePathResolver(BASE_PATH, previousVersion);
    try {
        final java.nio.file.Path outputPath = pathResolver.getOutputPath(scenarioName);
        final Path testBucketPath = new Path(outputPath.resolve(BUCKET_ID).toString());
        // restore the state
        final FileWriterBucketState recoveredState = readBucketStateFromTemplate(scenarioName, previousVersion);
        final int noOfPendingCheckpoints = 5;
        // there are 5 checkpoint does not complete.
        final Map<Long, List<InProgressFileWriter.PendingFileRecoverable>> pendingFileRecoverables = recoveredState.getPendingFileRecoverablesPerCheckpoint();
        Assert.assertEquals(5L, pendingFileRecoverables.size());
        final Set<String> beforeRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
        // before retsoring all file has "inprogress"
        for (int i = 0; i < noOfPendingCheckpoints; i++) {
            final String part = ".part-0-" + i + ".inprogress";
            assertThat(beforeRestorePaths, hasItem(startsWith(part)));
        }
        final FileWriterBucket<String> bucket = restoreBucket(recoveredState);
        Assert.assertEquals(testBucketPath, bucket.getBucketPath());
        Assert.assertEquals(noOfPendingCheckpoints, bucket.getPendingFiles().size());
        // simulates we commit the recovered pending files on the first checkpoint
        bucket.snapshotState();
        Collection<CommitRequest<FileSinkCommittable>> committables = bucket.prepareCommit(false).stream().map(MockCommitRequest::new).collect(Collectors.toList());
        FileCommitter committer = new FileCommitter(createBucketWriter());
        committer.commit(committables);
        final Set<String> afterRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
        // there is no "inporgress" in file name for the committed files.
        for (int i = 0; i < noOfPendingCheckpoints; i++) {
            final String part = "part-0-" + i;
            assertThat(afterRestorePaths, hasItem(part));
            afterRestorePaths.remove(part);
        }
        if (withInProgress) {
            // only the in-progress must be left
            assertThat(afterRestorePaths, iterableWithSize(1));
            // verify that the in-progress file is still there
            assertThat(afterRestorePaths, hasItem(startsWith(".part-0-" + noOfPendingCheckpoints + ".inprogress")));
        } else {
            assertThat(afterRestorePaths, empty());
        }
    } finally {
        FileUtils.deleteDirectory(pathResolver.getResourcePath(scenarioName).toFile());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) RowWiseBucketWriter(org.apache.flink.streaming.api.functions.sink.filesystem.RowWiseBucketWriter) CoreMatchers.is(org.hamcrest.CoreMatchers.is) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) FileUtils(org.apache.flink.util.FileUtils) RunWith(org.junit.runner.RunWith) CoreMatchers.startsWith(org.hamcrest.CoreMatchers.startsWith) MemorySize(org.apache.flink.configuration.MemorySize) Assert.assertThat(org.junit.Assert.assertThat) BucketStatePathResolver(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStatePathResolver) Path(org.apache.flink.core.fs.Path) SimpleVersionedSerialization(org.apache.flink.core.io.SimpleVersionedSerialization) Map(java.util.Map) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) StreamingFileSink(org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink) BucketStateGenerator(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStateGenerator) ClassRule(org.junit.ClassRule) Parameterized(org.junit.runners.Parameterized) CommitRequest(org.apache.flink.api.connector.sink2.Committer.CommitRequest) Matchers.empty(org.hamcrest.Matchers.empty) Files(java.nio.file.Files) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) Collection(java.util.Collection) Set(java.util.Set) Test(org.junit.Test) IOException(java.io.IOException) MockCommitRequest(org.apache.flink.api.connector.sink2.mocks.MockCommitRequest) Collectors(java.util.stream.Collectors) List(java.util.List) FileCommitter(org.apache.flink.connector.file.sink.committer.FileCommitter) FileSystem(org.apache.flink.core.fs.FileSystem) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) SimpleStringEncoder(org.apache.flink.api.common.serialization.SimpleStringEncoder) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) DefaultRollingPolicy(org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy) Assert(org.junit.Assert) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) TemporaryFolder(org.junit.rules.TemporaryFolder) CommitRequest(org.apache.flink.api.connector.sink2.Committer.CommitRequest) MockCommitRequest(org.apache.flink.api.connector.sink2.mocks.MockCommitRequest) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) BucketStatePathResolver(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStatePathResolver) FileCommitter(org.apache.flink.connector.file.sink.committer.FileCommitter) List(java.util.List)

Aggregations

IOException (java.io.IOException)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 SimpleStringEncoder (org.apache.flink.api.common.serialization.SimpleStringEncoder)1 CommitRequest (org.apache.flink.api.connector.sink2.Committer.CommitRequest)1 MockCommitRequest (org.apache.flink.api.connector.sink2.mocks.MockCommitRequest)1 MemorySize (org.apache.flink.configuration.MemorySize)1 FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)1 FileCommitter (org.apache.flink.connector.file.sink.committer.FileCommitter)1 FileSystem (org.apache.flink.core.fs.FileSystem)1 Path (org.apache.flink.core.fs.Path)1 SimpleVersionedSerialization (org.apache.flink.core.io.SimpleVersionedSerialization)1 SimpleVersionedSerializer (org.apache.flink.core.io.SimpleVersionedSerializer)1 BucketStateGenerator (org.apache.flink.streaming.api.functions.sink.filesystem.BucketStateGenerator)1