use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.
the class TestIncrementalFSViewSync method scheduleCompaction.
/**
* Schedule a pending compaction and validate.
*
* @param view Hoodie View
* @param instantTime COmpaction Instant Time
*/
private void scheduleCompaction(SyncableFileSystemView view, String instantTime) throws IOException {
List<Pair<String, FileSlice>> slices = partitions.stream().flatMap(p -> view.getLatestFileSlices(p).map(s -> Pair.of(p, s))).collect(Collectors.toList());
long initialExpTotalFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).sum();
HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(slices, Option.empty(), Option.empty());
HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant, TimelineMetadataUtils.serializeCompactionPlan(plan));
view.sync();
partitions.forEach(p -> {
view.getLatestFileSlices(p).forEach(fs -> {
assertEquals(instantTime, fs.getBaseInstantTime());
assertEquals(p, fs.getPartitionPath());
assertFalse(fs.getBaseFile().isPresent());
});
view.getLatestMergedFileSlicesBeforeOrOn(p, instantTime).forEach(fs -> {
assertTrue(HoodieTimeline.compareTimestamps(instantTime, HoodieTimeline.GREATER_THAN, fs.getBaseInstantTime()));
assertEquals(p, fs.getPartitionPath());
});
});
metaClient.reloadActiveTimeline();
SyncableFileSystemView newView = getFileSystemView(metaClient);
areViewsConsistent(view, newView, initialExpTotalFileSlices + partitions.size() * fileIdsPerPartition.size());
}
use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.
the class CompactionCommand method compactionShow.
@CliCommand(value = "compaction show", help = "Shows compaction details for a specific compaction instant")
public String compactionShow(@CliOption(key = "instant", mandatory = true, help = "Base path for the target hoodie table") final String compactionInstantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(activeTimeline.readCompactionPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly);
}
use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.
the class ITTestHoodieFlinkCompactor method testHoodieFlinkCompactor.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testHoodieFlinkCompactor(boolean enableChangelog) throws Exception {
// Create hoodie table and insert into data.
EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
TableEnvironment tableEnv = TableEnvironmentImpl.create(settings);
tableEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.COMPACTION_ASYNC_ENABLED.key(), "false");
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
options.put(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), enableChangelog + "");
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
tableEnv.executeSql(hoodieTableDDL);
tableEnv.executeSql(TestSQL.INSERT_T1).await();
// wait for the asynchronous commit to finish
TimeUnit.SECONDS.sleep(3);
// Make configuration and setAvroSchema.
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
FlinkCompactionConfig cfg = new FlinkCompactionConfig();
cfg.path = tempFile.getAbsolutePath();
Configuration conf = FlinkCompactionConfig.toFlinkConfig(cfg);
conf.setString(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
// create metaClient
HoodieTableMetaClient metaClient = StreamerUtil.createMetaClient(conf);
// set the table name
conf.setString(FlinkOptions.TABLE_NAME, metaClient.getTableConfig().getTableName());
// set table schema
CompactionUtil.setAvroSchema(conf, metaClient);
// infer changelog mode
CompactionUtil.inferChangelogMode(conf, metaClient);
HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf);
boolean scheduled = false;
// judge whether have operation
// To compute the compaction instant time and do compaction.
Option<String> compactionInstantTimeOption = CompactionUtil.getCompactionInstantTime(metaClient);
if (compactionInstantTimeOption.isPresent()) {
scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTimeOption.get(), Option.empty());
}
String compactionInstantTime = compactionInstantTimeOption.get();
assertTrue(scheduled, "The compaction plan should be scheduled");
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
// generate compaction plan
// should support configurable commit metadata
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
// Mark instant as compaction inflight
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
env.addSource(new CompactionPlanSourceFunction(compactionPlan, compactionInstantTime)).name("compaction_source").uid("uid_compaction_source").rebalance().transform("compact_task", TypeInformation.of(CompactionCommitEvent.class), new ProcessOperator<>(new CompactFunction(conf))).setParallelism(compactionPlan.getOperations().size()).addSink(new CompactionCommitSink(conf)).name("clean_commits").uid("uid_clean_commits").setParallelism(1);
env.execute("flink_hudi_compaction");
writeClient.close();
TestData.checkWrittenFullData(tempFile, EXPECTED1);
}
use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.
the class CompactionPlanOperator method scheduleCompaction.
private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
// the first instant takes the highest priority.
Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
if (!firstRequested.isPresent()) {
// do nothing.
LOG.info("No compaction plan for checkpoint " + checkpointId);
return;
}
String compactionInstantTime = firstRequested.get().getTimestamp();
// generate compaction plan
// should support configurable commit metadata
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
// do nothing.
LOG.info("Empty compaction plan for instant " + compactionInstantTime);
} else {
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
// Mark instant as compaction inflight
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
table.getMetaClient().reloadActiveTimeline();
List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
for (CompactionOperation operation : operations) {
output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
}
}
}
use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.
the class TestHoodieRealtimeRecordReader method createCompactionFile.
private File createCompactionFile(java.nio.file.Path basePath, String commitTime) throws IOException {
File file = basePath.resolve(".hoodie").resolve(HoodieTimeline.makeRequestedCompactionFileName(commitTime)).toFile();
assertTrue(file.createNewFile());
FileOutputStream os = new FileOutputStream(file);
try {
HoodieCompactionPlan compactionPlan = HoodieCompactionPlan.newBuilder().setVersion(2).build();
// Write empty commit metadata
os.write(TimelineMetadataUtils.serializeCompactionPlan(compactionPlan).get());
return file;
} finally {
os.close();
}
}
Aggregations