use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactorOperatorStateHandler method drain.
private void drain() throws ExecutionException, InterruptedException {
checkState(holdingSummary != null);
checkState(holdingSummary.getNumberOfPendingCommittables() == holdingSummary.getNumberOfCommittables() && holdingSummary.getNumberOfCommittables() == holdingMessages.size() + compactingMessages.size());
Long checkpointId = holdingSummary.getCheckpointId().isPresent() ? holdingSummary.getCheckpointId().getAsLong() : null;
int subtaskId = holdingSummary.getSubtaskId();
if (!compactingRequests.isEmpty()) {
CompletableFuture.allOf(compactingRequests.stream().map(r -> r.f1).toArray(CompletableFuture[]::new)).join();
for (Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>> compacting : compactingRequests) {
CompletableFuture<Iterable<FileSinkCommittable>> future = compacting.f1;
checkState(future.isDone());
// Exception is thrown if it's completed exceptionally
for (FileSinkCommittable c : future.get()) {
holdingMessages.add(new CommittableWithLineage<>(c, checkpointId, subtaskId));
}
}
}
// Appending the compacted committable to the holding summary
CommittableSummary<FileSinkCommittable> summary = new CommittableSummary<>(holdingSummary.getSubtaskId(), holdingSummary.getNumberOfSubtasks(), holdingSummary.getCheckpointId().isPresent() ? holdingSummary.getCheckpointId().getAsLong() : null, holdingMessages.size(), holdingMessages.size(), holdingSummary.getNumberOfFailedCommittables());
output.collect(new StreamRecord<>(summary));
for (CommittableMessage<FileSinkCommittable> committable : holdingMessages) {
output.collect(new StreamRecord<>(committable));
}
// Remaining requests should be all done and their results are all emitted.
// From now on the operator is stateless.
remainingRequestsState.clear();
compactingRequests.clear();
compactingMessages.clear();
holdingSummary = null;
holdingMessages = null;
if (writerStateDrained) {
// We can pass through everything if the writer state is also drained.
stateDrained = true;
compactService.close();
compactService = null;
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactorOperator method emitCompacted.
private void emitCompacted(@Nullable Long checkpointId) throws Exception {
List<FileSinkCommittable> compacted = new ArrayList<>();
Iterator<Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>>> iter = compactingRequests.iterator();
while (iter.hasNext()) {
Tuple2<CompactorRequest, CompletableFuture<Iterable<FileSinkCommittable>>> compacting = iter.next();
CompletableFuture<Iterable<FileSinkCommittable>> future = compacting.f1;
if (future.isDone()) {
iter.remove();
// Exception is thrown if it's completed exceptionally
for (FileSinkCommittable c : future.get()) {
compacted.add(c);
}
}
}
if (compacted.isEmpty()) {
return;
}
// A summary must be sent before all results during this checkpoint
CommittableSummary<FileSinkCommittable> summary = new CommittableSummary<>(getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks(), checkpointId, compacted.size(), compacted.size(), 0);
output.collect(new StreamRecord<>(summary));
for (FileSinkCommittable c : compacted) {
CommittableWithLineage<FileSinkCommittable> comm = new CommittableWithLineage<>(c, checkpointId, getRuntimeContext().getIndexOfThisSubtask());
output.collect(new StreamRecord<>(comm));
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactCoordinator method initializeState.
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
remainingCommittableState = new SimpleVersionedListState<>(context.getOperatorStateStore().getListState(REMAINING_COMMITTABLE_RAW_STATES_DESC), committableSerializer);
Iterable<FileSinkCommittable> stateRemaining = remainingCommittableState.get();
if (stateRemaining != null) {
for (FileSinkCommittable committable : stateRemaining) {
// restore and redistribute
if (packAndTrigger(committable)) {
fireAndPurge(committable.getBucketId());
}
}
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactCoordinatorStateHandler method initializeState.
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
ListState<FileSinkCommittable> remainingCommittableState = new SimpleVersionedListState<>(context.getOperatorStateStore().getListState(REMAINING_COMMITTABLE_RAW_STATES_DESC), committableSerializer);
Iterable<FileSinkCommittable> stateRemaining = remainingCommittableState.get();
if (stateRemaining != null) {
for (FileSinkCommittable committable : stateRemaining) {
// all committable should be wrapped with a single request, since multiple files
// compacting is not available now
String bucketId = committable.getBucketId();
CompactorRequest request = new CompactorRequest(bucketId);
request.addToCompact(committable);
output.collect(new StreamRecord<>(Either.Right(request)));
}
}
// Remaining committable should be all emitted, and the state can be cleared. From now on
// the operator is stateless, snapshotState is not necessary.
remainingCommittableState.clear();
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactService method getCompactingPath.
private List<Path> getCompactingPath(CompactorRequest request) throws IOException {
List<FileSinkCommittable> compactingCommittable = request.getCommittableToCompact();
List<Path> compactingFiles = new ArrayList<>();
for (FileSinkCommittable committable : compactingCommittable) {
PendingFileRecoverable pendingFile = committable.getPendingFile();
checkState(pendingFile != null, "Illegal committable to compact, pending file is null.");
Path pendingPath = pendingFile.getPath();
checkState(pendingPath != null && pendingPath.getName().startsWith("."), "Illegal pending file to compact, path should start with . but is " + pendingPath);
// commit the pending file and compact the committed file
bucketWriter.recoverPendingFile(pendingFile).commitAfterRecovery();
compactingFiles.add(pendingPath);
}
return compactingFiles;
}
Aggregations