use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class TestIncrementalFSViewSync method areViewsConsistent.
/**
* Check for equality of views.
*
* @param view1 View1
* @param view2 View2
*/
private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2, long expectedTotalFileSlices) {
// Timeline check
assertEquals(view1.getLastInstant(), view2.getLastInstant());
// View Checks
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream().flatMap(view1::getAllFileGroups).collect(Collectors.toMap(HoodieFileGroup::getFileGroupId, fg -> fg));
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream().flatMap(view2::getAllFileGroups).collect(Collectors.toMap(HoodieFileGroup::getFileGroupId, fg -> fg));
assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet());
long gotSlicesCount = fileGroupsMap1.keySet().stream().map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> {
HoodieFileGroup fg1 = e.getKey();
HoodieFileGroup fg2 = e.getValue();
assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
assertEquals(slices1.size(), slices2.size());
IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx))).forEach(e2 -> {
FileSlice slice1 = e2.getKey();
FileSlice slice2 = e2.getValue();
assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
assertEquals(slice1.getFileId(), slice2.getFileId());
assertEquals(slice1.getBaseFile().isPresent(), slice2.getBaseFile().isPresent());
if (slice1.getBaseFile().isPresent()) {
HoodieBaseFile df1 = slice1.getBaseFile().get();
HoodieBaseFile df2 = slice2.getBaseFile().get();
assertEquals(df1.getCommitTime(), df2.getCommitTime());
assertEquals(df1.getFileId(), df2.getFileId());
assertEquals(df1.getFileName(), df2.getFileName());
assertEquals(Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())), Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
}
List<Path> logPaths1 = slice1.getLogFiles().map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
List<Path> logPaths2 = slice2.getLogFiles().map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
assertEquals(logPaths1, logPaths2);
});
return slices1.size();
}).sum();
assertEquals(expectedTotalFileSlices, gotSlicesCount);
// Pending Compaction Operations Check
Set<Pair<String, CompactionOperation>> ops1 = view1.getPendingCompactionOperations().collect(Collectors.toSet());
Set<Pair<String, CompactionOperation>> ops2 = view2.getPendingCompactionOperations().collect(Collectors.toSet());
assertEquals(ops1, ops2);
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class ITTestCompactionCommand method testUnscheduleCompactFile.
/**
* This function mainly tests the workflow of 'compaction unscheduleFileId' command.
* The real test of {@link org.apache.hudi.client.CompactionAdminClient#unscheduleCompactionFileId}
* is {@link TestCompactionAdminClient#testUnscheduleCompactionFileId}.
*/
@Test
public void testUnscheduleCompactFile() throws IOException {
int numEntriesPerInstant = 10;
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
CompactionOperation op = CompactionOperation.convertFromAvroRecordInstance(CompactionUtils.getCompactionPlan(metaClient, "001").getOperations().stream().findFirst().get());
CommandResult cr = getShell().executeCommand(String.format("compaction unscheduleFileId --fileId %s --partitionPath %s --sparkMaster %s", op.getFileGroupId().getFileId(), op.getFileGroupId().getPartitionPath(), "local"));
assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertTrue(removeNonWordAndStripSpace(cr.getResult().toString()).contains("true")), () -> assertFalse(removeNonWordAndStripSpace(cr.getResult().toString()).contains("false")));
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class CompactionPlanOperator method scheduleCompaction.
private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
// the first instant takes the highest priority.
Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
if (!firstRequested.isPresent()) {
// do nothing.
LOG.info("No compaction plan for checkpoint " + checkpointId);
return;
}
String compactionInstantTime = firstRequested.get().getTimestamp();
// generate compaction plan
// should support configurable commit metadata
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
// do nothing.
LOG.info("Empty compaction plan for instant " + compactionInstantTime);
} else {
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
// Mark instant as compaction inflight
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
table.getMetaClient().reloadActiveTimeline();
List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
for (CompactionOperation operation : operations) {
output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
}
}
}
use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.
the class CompactionPlanSourceFunction method run.
@Override
public void run(SourceContext sourceContext) throws Exception {
List<CompactionOperation> operations = this.compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
LOG.info("CompactionPlanFunction compacting " + operations + " files");
for (CompactionOperation operation : operations) {
sourceContext.collect(new CompactionPlanEvent(compactionInstantTime, operation));
}
}
Aggregations