use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class CompactionAdminClient method unscheduleCompactionFileId.
/**
* Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files that
* were generated for that file-id after the compaction operation was scheduled.
*
* This operation MUST be executed with compactions and writer turned OFF.
*
* @param fgId FileGroupId to be unscheduled
* @param skipValidation Skip validation
* @param dryRun Dry Run Mode
*/
public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId, boolean skipValidation, boolean dryRun) throws Exception {
HoodieTableMetaClient metaClient = createMetaClient(false);
List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId, Option.empty(), skipValidation);
List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, 1, dryRun);
if (!dryRun && !res.isEmpty() && res.get(0).isExecuted() && res.get(0).isSuccess()) {
// Ready to remove this file-Id from compaction request
Pair<String, HoodieCompactionOperation> compactionOperationWithInstant = CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fgId);
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
List<HoodieCompactionOperation> newOps = plan.getOperations().stream().filter(op -> (!op.getFileId().equals(fgId.getFileId())) && (!op.getPartitionPath().equals(fgId.getPartitionPath()))).collect(Collectors.toList());
HoodieCompactionPlan newPlan = HoodieCompactionPlan.newBuilder().setOperations(newOps).setExtraMetadata(plan.getExtraMetadata()).build();
HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionOperationWithInstant.getLeft());
Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName());
if (metaClient.getFs().exists(inflightPath)) {
// revert if in inflight state
metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
}
// Overwrite compaction plan with updated info
metaClient.getActiveTimeline().saveToCompactionRequested(new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()), TimelineMetadataUtils.serializeCompactionPlan(newPlan), true);
}
return res;
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestCompactionUtils method testFileSliceCompactionOpEquality.
/**
* Validates if generated compaction operation matches with input file slice and partition path.
*
* @param slice File Slice
* @param op HoodieCompactionOperation
* @param expPartitionPath Partition path
*/
private void testFileSliceCompactionOpEquality(FileSlice slice, HoodieCompactionOperation op, String expPartitionPath, int version) {
assertEquals(expPartitionPath, op.getPartitionPath(), "Partition path is correct");
assertEquals(slice.getBaseInstantTime(), op.getBaseInstantTime(), "Same base-instant");
assertEquals(slice.getFileId(), op.getFileId(), "Same file-id");
if (slice.getBaseFile().isPresent()) {
HoodieBaseFile df = slice.getBaseFile().get();
assertEquals(version == COMPACTION_METADATA_VERSION_1 ? df.getPath() : df.getFileName(), op.getDataFilePath(), "Same data-file");
}
List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList());
IntStream.range(0, paths.size()).boxed().forEach(idx -> assertEquals(version == COMPACTION_METADATA_VERSION_1 ? paths.get(idx) : new Path(paths.get(idx)).getName(), op.getDeltaFilePaths().get(idx), "Log File Index " + idx));
assertEquals(METRICS, op.getMetrics(), "Metrics set");
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class BoundedIOCompactionStrategy method orderAndFilter.
@Override
public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig, List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
// Iterate through the operations in order and accept operations as long as we are within the
// IO limit
// Preserves the original ordering of compactions
List<HoodieCompactionOperation> finalOperations = new ArrayList<>();
long targetIORemaining = writeConfig.getTargetIOPerCompactionInMB();
for (HoodieCompactionOperation op : operations) {
long opIo = op.getMetrics().get(TOTAL_IO_MB).longValue();
targetIORemaining -= opIo;
finalOperations.add(op);
if (targetIORemaining <= 0) {
return finalOperations;
}
}
return finalOperations;
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestCompactionUtils method testBuildFromFileSlice.
@Test
public void testBuildFromFileSlice() {
String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
// Empty File-Slice with no data and log files
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(emptyFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
// File Slice with data-file but no log files
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
noLogFileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
// File Slice with no data-file but log files present
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
// File Slice with data-file and log files present
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
fileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], fileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestCompactionUtils method testCompactionTransformation.
@Test
public void testCompactionTransformation() {
// check HoodieCompactionOperation <=> CompactionOperation transformation function
Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
HoodieCompactionPlan plan = inputAndPlan.getRight();
List<HoodieCompactionOperation> originalOps = plan.getOperations();
// Convert to CompactionOperation
// Convert back to HoodieCompactionOperation and check for equality
List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(CompactionUtils::buildCompactionOperation).map(CompactionUtils::buildHoodieCompactionOperation).collect(Collectors.toList());
assertTrue(originalOps.size() > 0, "Transformation did get tested");
assertEquals(originalOps, regeneratedOps, "All fields set correctly in transformations");
}
Aggregations