use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactCoordinatorTest method testCompactOnEndOfInput.
@Test
public void testCompactOnEndOfInput() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
FileSinkCommittable committable0 = committable("0", ".0", 5);
harness.processElement(message(committable0));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
harness.snapshot(1, 1);
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.endInput();
List<CompactorRequest> results = harness.extractOutputValues();
Assert.assertEquals(1, results.size());
assertToCompact(results.get(0), committable0);
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactCoordinatorTest method testStateHandler.
@Test
public void testStateHandler() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
// with . prefix
FileSinkCommittable committable0 = committable("0", ".0", 5);
FileSinkCommittable committable1 = committable("0", ".1", 6);
// without . prefix
FileSinkCommittable committable2 = committable("0", "2", 6);
OperatorSubtaskState state;
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
harness.processElement(message(committable0));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
state = harness.snapshot(1, 1);
}
CompactCoordinatorStateHandler handler = new CompactCoordinatorStateHandler(getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> harness = new OneInputStreamOperatorTestHarness<>(handler)) {
harness.setup(new EitherSerializer<>(new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CommittableMessageSerializer<>(getTestCommittableSerializer())), new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CompactorRequestSerializer(getTestCommittableSerializer()))));
harness.initializeState(state);
harness.open();
Assert.assertEquals(1, harness.extractOutputValues().size());
harness.processElement(message(committable1));
harness.processElement(message(committable2));
List<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> results = harness.extractOutputValues();
Assert.assertEquals(3, results.size());
// restored request
Assert.assertTrue(results.get(0).isRight());
assertToCompact(results.get(0).right(), committable0);
// committable with . prefix should also be passed through
Assert.assertTrue(results.get(1).isLeft() && results.get(1).left() instanceof CommittableWithLineage);
Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(1).left()).getCommittable(), committable1);
// committable without . prefix should be passed through normally
Assert.assertTrue(results.get(2).isLeft() && results.get(2).left() instanceof CommittableWithLineage);
Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(2).left()).getCommittable(), committable2);
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactCoordinatorTest method testCompactOnCheckpoint.
@Test
public void testCompactOnCheckpoint() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().enableCompactionOnCheckpoint(1).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
FileSinkCommittable committable0 = committable("0", ".0", 5);
FileSinkCommittable committable1 = committable("0", ".1", 6);
FileSinkCommittable committable2 = committable("0", ".2", 5);
FileSinkCommittable committable3 = committable("1", ".0", 5);
harness.processElement(message(committable0));
harness.processElement(message(committable1));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
harness.snapshot(1, 1);
Assert.assertEquals(1, harness.extractOutputValues().size());
harness.processElement(message(committable2));
harness.processElement(message(committable3));
Assert.assertEquals(1, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(2);
harness.snapshot(2, 2);
List<CompactorRequest> results = harness.extractOutputValues();
Assert.assertEquals(3, results.size());
assertToCompact(results.get(0), committable0, committable1);
assertToCompact(results.get(1), committable2);
assertToCompact(results.get(2), committable3);
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class FileWriterBucketStateSerializerMigrationTest method testDeserializationFull.
private void testDeserializationFull(final boolean withInProgress, final String scenarioName) throws IOException, InterruptedException {
final BucketStatePathResolver pathResolver = new BucketStatePathResolver(BASE_PATH, previousVersion);
try {
final java.nio.file.Path outputPath = pathResolver.getOutputPath(scenarioName);
final Path testBucketPath = new Path(outputPath.resolve(BUCKET_ID).toString());
// restore the state
final FileWriterBucketState recoveredState = readBucketStateFromTemplate(scenarioName, previousVersion);
final int noOfPendingCheckpoints = 5;
// there are 5 checkpoint does not complete.
final Map<Long, List<InProgressFileWriter.PendingFileRecoverable>> pendingFileRecoverables = recoveredState.getPendingFileRecoverablesPerCheckpoint();
Assert.assertEquals(5L, pendingFileRecoverables.size());
final Set<String> beforeRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
// before retsoring all file has "inprogress"
for (int i = 0; i < noOfPendingCheckpoints; i++) {
final String part = ".part-0-" + i + ".inprogress";
assertThat(beforeRestorePaths, hasItem(startsWith(part)));
}
final FileWriterBucket<String> bucket = restoreBucket(recoveredState);
Assert.assertEquals(testBucketPath, bucket.getBucketPath());
Assert.assertEquals(noOfPendingCheckpoints, bucket.getPendingFiles().size());
// simulates we commit the recovered pending files on the first checkpoint
bucket.snapshotState();
Collection<CommitRequest<FileSinkCommittable>> committables = bucket.prepareCommit(false).stream().map(MockCommitRequest::new).collect(Collectors.toList());
FileCommitter committer = new FileCommitter(createBucketWriter());
committer.commit(committables);
final Set<String> afterRestorePaths = Files.list(outputPath.resolve(BUCKET_ID)).map(file -> file.getFileName().toString()).collect(Collectors.toSet());
// there is no "inporgress" in file name for the committed files.
for (int i = 0; i < noOfPendingCheckpoints; i++) {
final String part = "part-0-" + i;
assertThat(afterRestorePaths, hasItem(part));
afterRestorePaths.remove(part);
}
if (withInProgress) {
// only the in-progress must be left
assertThat(afterRestorePaths, iterableWithSize(1));
// verify that the in-progress file is still there
assertThat(afterRestorePaths, hasItem(startsWith(".part-0-" + noOfPendingCheckpoints + ".inprogress")));
} else {
assertThat(afterRestorePaths, empty());
}
} finally {
FileUtils.deleteDirectory(pathResolver.getResourcePath(scenarioName).toFile());
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class FileWriterBucketTest method testOnCheckpointWithInProgressFileToCleanup.
@Test
public void testOnCheckpointWithInProgressFileToCleanup() throws IOException {
File outDir = TEMP_FOLDER.newFolder();
Path path = new Path(outDir.toURI());
TestRecoverableWriter recoverableWriter = getRecoverableWriter(path);
FileWriterBucket<String> bucket = createBucket(recoverableWriter, path, DEFAULT_ROLLING_POLICY, OutputFileConfig.builder().build());
bucket.write("test-element", 0);
bucket.prepareCommit(false);
bucket.snapshotState();
// One more checkpoint
bucket.write("test-element", 0);
List<FileSinkCommittable> fileSinkCommittables = bucket.prepareCommit(false);
FileWriterBucketState bucketState = bucket.snapshotState();
compareNumberOfPendingAndInProgress(fileSinkCommittables, 0, 1);
assertEquals(BUCKET_ID, bucketState.getBucketId());
assertEquals(path, bucketState.getBucketPath());
assertNotNull("The bucket should not have in-progress recoverable", bucketState.getInProgressFileRecoverable());
}
Aggregations