Search in sources :

Example 11 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactCoordinatorTest method testCompactOnEndOfInput.

@Test
public void testCompactOnEndOfInput() throws Exception {
    FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
    CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
    try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
        harness.setup();
        harness.open();
        FileSinkCommittable committable0 = committable("0", ".0", 5);
        harness.processElement(message(committable0));
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(1);
        harness.snapshot(1, 1);
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.endInput();
        List<CompactorRequest> results = harness.extractOutputValues();
        Assert.assertEquals(1, results.size());
        assertToCompact(results.get(0), committable0);
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CompactCoordinator(org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator) Test(org.junit.Test)

Example 12 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactCoordinatorTest method testStateHandler.

@Test
public void testStateHandler() throws Exception {
    FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
    CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
    // with . prefix
    FileSinkCommittable committable0 = committable("0", ".0", 5);
    FileSinkCommittable committable1 = committable("0", ".1", 6);
    // without . prefix
    FileSinkCommittable committable2 = committable("0", "2", 6);
    OperatorSubtaskState state;
    try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
        harness.setup();
        harness.open();
        harness.processElement(message(committable0));
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(1);
        state = harness.snapshot(1, 1);
    }
    CompactCoordinatorStateHandler handler = new CompactCoordinatorStateHandler(getTestCommittableSerializer());
    try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> harness = new OneInputStreamOperatorTestHarness<>(handler)) {
        harness.setup(new EitherSerializer<>(new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CommittableMessageSerializer<>(getTestCommittableSerializer())), new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CompactorRequestSerializer(getTestCommittableSerializer()))));
        harness.initializeState(state);
        harness.open();
        Assert.assertEquals(1, harness.extractOutputValues().size());
        harness.processElement(message(committable1));
        harness.processElement(message(committable2));
        List<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> results = harness.extractOutputValues();
        Assert.assertEquals(3, results.size());
        // restored request
        Assert.assertTrue(results.get(0).isRight());
        assertToCompact(results.get(0).right(), committable0);
        // committable with . prefix should also be passed through
        Assert.assertTrue(results.get(1).isLeft() && results.get(1).left() instanceof CommittableWithLineage);
        Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(1).left()).getCommittable(), committable1);
        // committable without . prefix should be passed through normally
        Assert.assertTrue(results.get(2).isLeft() && results.get(2).left() instanceof CommittableWithLineage);
        Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(2).left()).getCommittable(), committable2);
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) CompactCoordinatorStateHandler(org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinatorStateHandler) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CompactCoordinator(org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) SimpleVersionedSerializerTypeSerializerProxy(org.apache.flink.core.io.SimpleVersionedSerializerTypeSerializerProxy) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) Either(org.apache.flink.types.Either) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) CompactorRequestSerializer(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequestSerializer) Test(org.junit.Test)

Example 13 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class CompactCoordinatorTest method testCompactOnCheckpoint.

@Test
public void testCompactOnCheckpoint() throws Exception {
    FileCompactStrategy strategy = Builder.newBuilder().enableCompactionOnCheckpoint(1).build();
    CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
    try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
        harness.setup();
        harness.open();
        FileSinkCommittable committable0 = committable("0", ".0", 5);
        FileSinkCommittable committable1 = committable("0", ".1", 6);
        FileSinkCommittable committable2 = committable("0", ".2", 5);
        FileSinkCommittable committable3 = committable("1", ".0", 5);
        harness.processElement(message(committable0));
        harness.processElement(message(committable1));
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(1);
        harness.snapshot(1, 1);
        Assert.assertEquals(1, harness.extractOutputValues().size());
        harness.processElement(message(committable2));
        harness.processElement(message(committable3));
        Assert.assertEquals(1, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(2);
        harness.snapshot(2, 2);
        List<CompactorRequest> results = harness.extractOutputValues();
        Assert.assertEquals(3, results.size());
        assertToCompact(results.get(0), committable0, committable1);
        assertToCompact(results.get(1), committable2);
        assertToCompact(results.get(2), committable3);
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CompactCoordinator(org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator) Test(org.junit.Test)

Example 14 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class FileWriterBucketStateSerializerMigrationTest method testDeserializationFull.

private void testDeserializationFull(final boolean withInProgress, final String scenarioName) throws IOException, InterruptedException {
    final BucketStatePathResolver pathResolver = new BucketStatePathResolver(BASE_PATH, previousVersion);
    try {
        final java.nio.file.Path outputPath = pathResolver.getOutputPath(scenarioName);
        final Path testBucketPath = new Path(outputPath.resolve(BUCKET_ID).toString());
        // restore the state
        final FileWriterBucketState recoveredState = readBucketStateFromTemplate(scenarioName, previousVersion);
        final int noOfPendingCheckpoints = 5;
        // there are 5 checkpoint does not complete.
        final Map<Long, List<InProgressFileWriter.PendingFileRecoverable>> pendingFileRecoverables = recoveredState.getPendingFileRecoverablesPerCheckpoint();
        Assert.assertEquals(5L, pendingFileRecoverables.size());
        final Set<String> beforeRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
        // before retsoring all file has "inprogress"
        for (int i = 0; i < noOfPendingCheckpoints; i++) {
            final String part = ".part-0-" + i + ".inprogress";
            assertThat(beforeRestorePaths, hasItem(startsWith(part)));
        }
        final FileWriterBucket<String> bucket = restoreBucket(recoveredState);
        Assert.assertEquals(testBucketPath, bucket.getBucketPath());
        Assert.assertEquals(noOfPendingCheckpoints, bucket.getPendingFiles().size());
        // simulates we commit the recovered pending files on the first checkpoint
        bucket.snapshotState();
        Collection<CommitRequest<FileSinkCommittable>> committables = bucket.prepareCommit(false).stream().map(MockCommitRequest::new).collect(Collectors.toList());
        FileCommitter committer = new FileCommitter(createBucketWriter());
        committer.commit(committables);
        final Set<String> afterRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
        // there is no "inporgress" in file name for the committed files.
        for (int i = 0; i < noOfPendingCheckpoints; i++) {
            final String part = "part-0-" + i;
            assertThat(afterRestorePaths, hasItem(part));
            afterRestorePaths.remove(part);
        }
        if (withInProgress) {
            // only the in-progress must be left
            assertThat(afterRestorePaths, iterableWithSize(1));
            // verify that the in-progress file is still there
            assertThat(afterRestorePaths, hasItem(startsWith(".part-0-" + noOfPendingCheckpoints + ".inprogress")));
        } else {
            assertThat(afterRestorePaths, empty());
        }
    } finally {
        FileUtils.deleteDirectory(pathResolver.getResourcePath(scenarioName).toFile());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) RowWiseBucketWriter(org.apache.flink.streaming.api.functions.sink.filesystem.RowWiseBucketWriter) CoreMatchers.is(org.hamcrest.CoreMatchers.is) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) FileUtils(org.apache.flink.util.FileUtils) RunWith(org.junit.runner.RunWith) CoreMatchers.startsWith(org.hamcrest.CoreMatchers.startsWith) MemorySize(org.apache.flink.configuration.MemorySize) Assert.assertThat(org.junit.Assert.assertThat) BucketStatePathResolver(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStatePathResolver) Path(org.apache.flink.core.fs.Path) SimpleVersionedSerialization(org.apache.flink.core.io.SimpleVersionedSerialization) Map(java.util.Map) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) StreamingFileSink(org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink) BucketStateGenerator(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStateGenerator) ClassRule(org.junit.ClassRule) Parameterized(org.junit.runners.Parameterized) CommitRequest(org.apache.flink.api.connector.sink2.Committer.CommitRequest) Matchers.empty(org.hamcrest.Matchers.empty) Files(java.nio.file.Files) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) Collection(java.util.Collection) Set(java.util.Set) Test(org.junit.Test) IOException(java.io.IOException) MockCommitRequest(org.apache.flink.api.connector.sink2.mocks.MockCommitRequest) Collectors(java.util.stream.Collectors) List(java.util.List) FileCommitter(org.apache.flink.connector.file.sink.committer.FileCommitter) FileSystem(org.apache.flink.core.fs.FileSystem) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) SimpleStringEncoder(org.apache.flink.api.common.serialization.SimpleStringEncoder) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) DefaultRollingPolicy(org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy) Assert(org.junit.Assert) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) TemporaryFolder(org.junit.rules.TemporaryFolder) CommitRequest(org.apache.flink.api.connector.sink2.Committer.CommitRequest) MockCommitRequest(org.apache.flink.api.connector.sink2.mocks.MockCommitRequest) InProgressFileWriter(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter) BucketStatePathResolver(org.apache.flink.streaming.api.functions.sink.filesystem.BucketStatePathResolver) FileCommitter(org.apache.flink.connector.file.sink.committer.FileCommitter) List(java.util.List)

Example 15 with FileSinkCommittable

use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.

the class FileWriterBucketTest method testOnCheckpointWithInProgressFileToCleanup.

@Test
public void testOnCheckpointWithInProgressFileToCleanup() throws IOException {
    File outDir = TEMP_FOLDER.newFolder();
    Path path = new Path(outDir.toURI());
    TestRecoverableWriter recoverableWriter = getRecoverableWriter(path);
    FileWriterBucket<String> bucket = createBucket(recoverableWriter, path, DEFAULT_ROLLING_POLICY, OutputFileConfig.builder().build());
    bucket.write("test-element", 0);
    bucket.prepareCommit(false);
    bucket.snapshotState();
    // One more checkpoint
    bucket.write("test-element", 0);
    List<FileSinkCommittable> fileSinkCommittables = bucket.prepareCommit(false);
    FileWriterBucketState bucketState = bucket.snapshotState();
    compareNumberOfPendingAndInProgress(fileSinkCommittables, 0, 1);
    assertEquals(BUCKET_ID, bucketState.getBucketId());
    assertEquals(path, bucketState.getBucketPath());
    assertNotNull("The bucket should not have in-progress recoverable", bucketState.getInProgressFileRecoverable());
}
Also used : Path(org.apache.flink.core.fs.Path) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) File(java.io.File) Test(org.junit.Test)

Aggregations

FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)32 Test (org.junit.Test)22 CommittableMessage (org.apache.flink.streaming.api.connector.sink2.CommittableMessage)12 Path (org.apache.flink.core.fs.Path)11 CompactorRequest (org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest)10 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)10 File (java.io.File)8 CompactCoordinator (org.apache.flink.connector.file.sink.compactor.operator.CompactCoordinator)8 ArrayList (java.util.ArrayList)7 CommittableWithLineage (org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage)6 CommittableSummary (org.apache.flink.streaming.api.connector.sink2.CommittableSummary)5 List (java.util.List)4 MockCommitRequest (org.apache.flink.api.connector.sink2.mocks.MockCommitRequest)4 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)4 PendingFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable)4 Map (java.util.Map)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 Collectors (java.util.stream.Collectors)2