use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactorOperatorStateHandler method processElement.
@Override
public void processElement(StreamRecord<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> element) throws Exception {
Either<CommittableMessage<FileSinkCommittable>, CompactorRequest> record = element.getValue();
if (stateDrained) {
// all input should be committable messages to pass through
output.collect(new StreamRecord<>(record.left()));
return;
}
if (record.isRight()) {
submit(element.getValue().right());
return;
}
CommittableMessage<FileSinkCommittable> message = record.left();
if (message instanceof CommittableSummary) {
checkState(holdingSummary == null, "Duplicate summary before the first checkpoint.");
holdingSummary = (CommittableSummary<FileSinkCommittable>) message;
holdingMessages = new ArrayList<>(holdingSummary.getNumberOfCommittables());
} else {
boolean compacting = false;
CommittableWithLineage<FileSinkCommittable> committableWithLineage = (CommittableWithLineage<FileSinkCommittable>) message;
if (committableWithLineage.getCommittable().hasPendingFile()) {
FileSinkCommittable committable = committableWithLineage.getCommittable();
PendingFileRecoverable pendingFile = committable.getPendingFile();
if (pendingFile.getPath() != null && pendingFile.getPath().getName().startsWith(".")) {
// The pending file is the in-progress file of the previous run, which
// should be committed and compacted before sending to the committer.
CompactorRequest request = new CompactorRequest(committable.getBucketId());
request.addToCompact(committable);
submit(request);
compacting = true;
compactingMessages.add(message);
} else {
// A normal file is received, indicating the writer state is drained.
writerStateDrained = true;
if (compactingMessages.isEmpty() && compactingRequests.isEmpty()) {
// No state needs to be handled, the holding summary and all committable
// messages can be sent eagerly
checkState(holdingSummary != null);
output.collect(new StreamRecord<>(holdingSummary));
holdingSummary = null;
this.stateDrained = true;
output.collect(new StreamRecord<>(committableWithLineage));
}
}
}
if (!compacting && !stateDrained) {
// Compacting messages should not be added
// If the state is drained, no further messages need to be added
holdingMessages.add(message);
}
}
}
use of org.apache.flink.connector.file.sink.FileSinkCommittable in project flink by apache.
the class CompactService method compact.
@SuppressWarnings({ "rawtypes", "unchecked" })
private Iterable<FileSinkCommittable> compact(CompactorRequest request) throws Exception {
List<FileSinkCommittable> results = new ArrayList<>(request.getCommittableToPassthrough());
List<Path> compactingFiles = getCompactingPath(request);
if (compactingFiles.isEmpty()) {
return results;
}
Path targetPath = assembleCompactedFilePath(compactingFiles.get(0));
CompactingFileWriter compactingFileWriter = bucketWriter.openNewCompactingFile(compactingWriterType, request.getBucketId(), targetPath, System.currentTimeMillis());
if (compactingWriterType == Type.RECORD_WISE) {
((RecordWiseFileCompactor) fileCompactor).compact(compactingFiles, ((RecordWiseCompactingFileWriter) compactingFileWriter)::write);
} else if (compactingWriterType == CompactingFileWriter.Type.OUTPUT_STREAM) {
((OutputStreamBasedFileCompactor) fileCompactor).compact(compactingFiles, ((OutputStreamBasedCompactingFileWriter) compactingFileWriter).asOutputStream());
}
PendingFileRecoverable compactedPendingFile = compactingFileWriter.closeForCommit();
FileSinkCommittable compacted = new FileSinkCommittable(request.getBucketId(), compactedPendingFile);
results.add(compacted);
for (Path f : compactingFiles) {
// cleanup compacted files
results.add(new FileSinkCommittable(request.getBucketId(), f));
}
return results;
}
Aggregations