use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit in project flink by apache.
the class CompactCoordinator method coordinate.
/**
* Do stable compaction coordination.
*/
private void coordinate(long checkpointId, Map<String, List<Path>> partFiles) {
Function<Path, Long> sizeFunc = path -> {
try {
return fileSystem.getFileStatus(path).getLen();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
// We need a stable compaction algorithm.
Map<String, List<List<Path>>> compactUnits = new HashMap<>();
partFiles.forEach((p, files) -> {
// Sort files for stable compaction algorithm.
files.sort(Comparator.comparing(Path::getPath));
compactUnits.put(p, BinPacking.pack(files, sizeFunc, targetFileSize));
});
// Now, send this stable pack list to compactor.
// NOTE, use broadcast emitting (Because it needs to emit checkpoint barrier),
// operators will pick its units by unit id and task id.
int unitId = 0;
for (Map.Entry<String, List<List<Path>>> unitsEntry : compactUnits.entrySet()) {
String partition = unitsEntry.getKey();
for (List<Path> unit : unitsEntry.getValue()) {
output.collect(new StreamRecord<>(new CompactionUnit(unitId, partition, unit)));
unitId++;
}
}
LOG.debug("Coordinate checkpoint-{}, compaction units are: {}", checkpointId, compactUnits);
// Emit checkpoint barrier
output.collect(new StreamRecord<>(new EndCompaction(checkpointId)));
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit in project flink by apache.
the class CompactOperatorTest method testEndInput.
@Test
public void testEndInput() throws Exception {
Path f0 = newFile(".uncompacted-f0", 3);
Path f1 = newFile(".uncompacted-f1", 4);
Path f2 = newFile(".uncompacted-f2", 2);
FileSystem fs = f0.getFileSystem();
runCompact(harness -> {
harness.setup();
harness.open();
harness.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1)), 0);
harness.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f2)), 0);
// test without snapshot
harness.endInput();
// check all compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f0")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f2")));
// check all temp files have been deleted
Assert.assertFalse(fs.exists(f0));
Assert.assertFalse(fs.exists(f1));
Assert.assertFalse(fs.exists(f2));
});
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit in project flink by apache.
the class CompactOperator method processElement.
@Override
public void processElement(StreamRecord<CoordinatorOutput> element) throws Exception {
CoordinatorOutput value = element.getValue();
if (value instanceof CompactionUnit) {
CompactionUnit unit = (CompactionUnit) value;
if (unit.isTaskMessage(getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getIndexOfThisSubtask())) {
String partition = unit.getPartition();
List<Path> paths = unit.getPaths();
doCompact(partition, paths);
this.partitions.add(partition);
// Only after the current checkpoint is successfully executed can delete
// the expired files, so as to ensure the existence of the files.
this.currentExpiredFiles.addAll(paths);
}
} else if (value instanceof EndCompaction) {
endCompaction(((EndCompaction) value).getCheckpointId());
}
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit in project flink by apache.
the class CompactOperatorTest method testCompactOperator.
@Test
public void testCompactOperator() throws Exception {
AtomicReference<OperatorSubtaskState> state = new AtomicReference<>();
Path f0 = newFile(".uncompacted-f0", 3);
Path f1 = newFile(".uncompacted-f1", 2);
Path f2 = newFile(".uncompacted-f2", 2);
Path f3 = newFile(".uncompacted-f3", 5);
Path f4 = newFile(".uncompacted-f4", 1);
Path f5 = newFile(".uncompacted-f5", 5);
Path f6 = newFile(".uncompacted-f6", 4);
FileSystem fs = f0.getFileSystem();
runCompact(harness -> {
harness.setup();
harness.open();
harness.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1, f4)), 0);
harness.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f3)), 0);
harness.processElement(new CompactionUnit(2, "p1", Arrays.asList(f2, f5)), 0);
harness.processElement(new CompactionUnit(3, "p0", Collections.singletonList(f6)), 0);
harness.processElement(new EndCompaction(1), 0);
state.set(harness.snapshot(2, 0));
// check output commit info
List<PartitionCommitInfo> outputs = harness.extractOutputValues();
Assert.assertEquals(1, outputs.size());
Assert.assertEquals(1, outputs.get(0).getCheckpointId());
Assert.assertEquals(Arrays.asList("p0", "p1"), outputs.get(0).getPartitions());
// check all compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f0")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f2")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f3")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f6")));
// check one compacted file
byte[] bytes = FileUtils.readAllBytes(new File(folder.getPath(), "compacted-f0").toPath());
Arrays.sort(bytes);
Assert.assertArrayEquals(new byte[] { 0, 0, 0, 1, 1, 2 }, bytes);
});
runCompact(harness -> {
harness.setup();
harness.initializeState(state.get());
harness.open();
harness.notifyOfCompletedCheckpoint(2);
// check all temp files have been deleted
Assert.assertFalse(fs.exists(f0));
Assert.assertFalse(fs.exists(f1));
Assert.assertFalse(fs.exists(f2));
Assert.assertFalse(fs.exists(f3));
Assert.assertFalse(fs.exists(f4));
Assert.assertFalse(fs.exists(f5));
Assert.assertFalse(fs.exists(f6));
});
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit in project flink by apache.
the class CompactOperatorTest method testUnitSelection.
@Test
public void testUnitSelection() throws Exception {
OneInputStreamOperatorTestHarness<CoordinatorOutput, PartitionCommitInfo> harness0 = create(2, 0);
harness0.setup();
harness0.open();
OneInputStreamOperatorTestHarness<CoordinatorOutput, PartitionCommitInfo> harness1 = create(2, 1);
harness1.setup();
harness1.open();
Path f0 = newFile(".uncompacted-f0", 3);
Path f1 = newFile(".uncompacted-f1", 2);
Path f2 = newFile(".uncompacted-f2", 2);
Path f3 = newFile(".uncompacted-f3", 5);
Path f4 = newFile(".uncompacted-f4", 1);
Path f5 = newFile(".uncompacted-f5", 5);
Path f6 = newFile(".uncompacted-f6", 4);
FileSystem fs = f0.getFileSystem();
// broadcast
harness0.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1, f4)), 0);
harness0.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f3)), 0);
harness0.processElement(new CompactionUnit(2, "p0", Arrays.asList(f2, f5)), 0);
harness0.processElement(new CompactionUnit(3, "p0", Collections.singletonList(f6)), 0);
harness0.processElement(new EndCompaction(1), 0);
// check compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f0")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f2")));
// f3 and f6 are in the charge of another task
Assert.assertFalse(fs.exists(new Path(folder, "compacted-f3")));
Assert.assertFalse(fs.exists(new Path(folder, "compacted-f6")));
harness1.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1, f4)), 0);
harness1.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f3)), 0);
harness1.processElement(new CompactionUnit(2, "p0", Arrays.asList(f2, f5)), 0);
harness1.processElement(new CompactionUnit(3, "p0", Collections.singletonList(f6)), 0);
harness1.processElement(new EndCompaction(1), 0);
// check compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f3")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f6")));
harness0.close();
harness1.close();
}
Aggregations