Search in sources :

Example 36 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestIncrementalFSViewSync method scheduleCompaction.

/**
 * Schedule a pending compaction and validate.
 *
 * @param view Hoodie View
 * @param instantTime COmpaction Instant Time
 */
private void scheduleCompaction(SyncableFileSystemView view, String instantTime) throws IOException {
    List<Pair<String, FileSlice>> slices = partitions.stream().flatMap(p -> view.getLatestFileSlices(p).map(s -> Pair.of(p, s))).collect(Collectors.toList());
    long initialExpTotalFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).sum();
    HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(slices, Option.empty(), Option.empty());
    HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
    metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant, TimelineMetadataUtils.serializeCompactionPlan(plan));
    view.sync();
    partitions.forEach(p -> {
        view.getLatestFileSlices(p).forEach(fs -> {
            assertEquals(instantTime, fs.getBaseInstantTime());
            assertEquals(p, fs.getPartitionPath());
            assertFalse(fs.getBaseFile().isPresent());
        });
        view.getLatestMergedFileSlicesBeforeOrOn(p, instantTime).forEach(fs -> {
            assertTrue(HoodieTimeline.compareTimestamps(instantTime, HoodieTimeline.GREATER_THAN, fs.getBaseInstantTime()));
            assertEquals(p, fs.getPartitionPath());
        });
    });
    metaClient.reloadActiveTimeline();
    SyncableFileSystemView newView = getFileSystemView(metaClient);
    areViewsConsistent(view, newView, initialExpTotalFileSlices + partitions.size() * fileIdsPerPartition.size());
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Pair(org.apache.hudi.common.util.collection.Pair)

Example 37 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionCommand method compactionShow.

@CliCommand(value = "compaction show", help = "Shows compaction details for a specific compaction instant")
public String compactionShow(@CliOption(key = "instant", mandatory = true, help = "Base path for the target hoodie table") final String compactionInstantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
    HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(activeTimeline.readCompactionPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
    return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 38 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class ITTestHoodieFlinkCompactor method testHoodieFlinkCompactor.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testHoodieFlinkCompactor(boolean enableChangelog) throws Exception {
    // Create hoodie table and insert into data.
    EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
    TableEnvironment tableEnv = TableEnvironmentImpl.create(settings);
    tableEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
    Map<String, String> options = new HashMap<>();
    options.put(FlinkOptions.COMPACTION_ASYNC_ENABLED.key(), "false");
    options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
    options.put(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
    options.put(FlinkOptions.CHANGELOG_ENABLED.key(), enableChangelog + "");
    String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
    tableEnv.executeSql(hoodieTableDDL);
    tableEnv.executeSql(TestSQL.INSERT_T1).await();
    // wait for the asynchronous commit to finish
    TimeUnit.SECONDS.sleep(3);
    // Make configuration and setAvroSchema.
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    FlinkCompactionConfig cfg = new FlinkCompactionConfig();
    cfg.path = tempFile.getAbsolutePath();
    Configuration conf = FlinkCompactionConfig.toFlinkConfig(cfg);
    conf.setString(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
    // create metaClient
    HoodieTableMetaClient metaClient = StreamerUtil.createMetaClient(conf);
    // set the table name
    conf.setString(FlinkOptions.TABLE_NAME, metaClient.getTableConfig().getTableName());
    // set table schema
    CompactionUtil.setAvroSchema(conf, metaClient);
    // infer changelog mode
    CompactionUtil.inferChangelogMode(conf, metaClient);
    HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf);
    boolean scheduled = false;
    // judge whether have operation
    // To compute the compaction instant time and do compaction.
    Option<String> compactionInstantTimeOption = CompactionUtil.getCompactionInstantTime(metaClient);
    if (compactionInstantTimeOption.isPresent()) {
        scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTimeOption.get(), Option.empty());
    }
    String compactionInstantTime = compactionInstantTimeOption.get();
    assertTrue(scheduled, "The compaction plan should be scheduled");
    HoodieFlinkTable<?> table = writeClient.getHoodieTable();
    // generate compaction plan
    // should support configurable commit metadata
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
    HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
    // Mark instant as compaction inflight
    table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
    env.addSource(new CompactionPlanSourceFunction(compactionPlan, compactionInstantTime)).name("compaction_source").uid("uid_compaction_source").rebalance().transform("compact_task", TypeInformation.of(CompactionCommitEvent.class), new ProcessOperator<>(new CompactFunction(conf))).setParallelism(compactionPlan.getOperations().size()).addSink(new CompactionCommitSink(conf)).name("clean_commits").uid("uid_clean_commits").setParallelism(1);
    env.execute("flink_hudi_compaction");
    writeClient.close();
    TestData.checkWrittenFullData(tempFile, EXPECTED1);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) ProcessOperator(org.apache.flink.streaming.api.operators.ProcessOperator) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) TableEnvironment(org.apache.flink.table.api.TableEnvironment) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 39 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionPlanOperator method scheduleCompaction.

private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
    // the first instant takes the highest priority.
    Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
    if (!firstRequested.isPresent()) {
        // do nothing.
        LOG.info("No compaction plan for checkpoint " + checkpointId);
        return;
    }
    String compactionInstantTime = firstRequested.get().getTimestamp();
    // generate compaction plan
    // should support configurable commit metadata
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
    if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
        // do nothing.
        LOG.info("Empty compaction plan for instant " + compactionInstantTime);
    } else {
        HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
        // Mark instant as compaction inflight
        table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
        table.getMetaClient().reloadActiveTimeline();
        List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
        LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
        for (CompactionOperation operation : operations) {
            output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) Output(org.apache.flink.streaming.api.operators.Output) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) CompactionUtil(org.apache.hudi.util.CompactionUtil) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation)

Example 40 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestHoodieRealtimeRecordReader method createCompactionFile.

private File createCompactionFile(java.nio.file.Path basePath, String commitTime) throws IOException {
    File file = basePath.resolve(".hoodie").resolve(HoodieTimeline.makeRequestedCompactionFileName(commitTime)).toFile();
    assertTrue(file.createNewFile());
    FileOutputStream os = new FileOutputStream(file);
    try {
        HoodieCompactionPlan compactionPlan = HoodieCompactionPlan.newBuilder().setVersion(2).build();
        // Write empty commit metadata
        os.write(TimelineMetadataUtils.serializeCompactionPlan(compactionPlan).get());
        return file;
    } finally {
        os.close();
    }
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileOutputStream(java.io.FileOutputStream) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File)

Aggregations

HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)41 IOException (java.io.IOException)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)18 List (java.util.List)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)16 Pair (org.apache.hudi.common.util.collection.Pair)16 ArrayList (java.util.ArrayList)15 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)15 Path (org.apache.hadoop.fs.Path)14 HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)13 Option (org.apache.hudi.common.util.Option)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 Map (java.util.Map)12 Set (java.util.Set)12 Collectors (java.util.stream.Collectors)12 CompactionOperation (org.apache.hudi.common.model.CompactionOperation)12 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)12 FileSlice (org.apache.hudi.common.model.FileSlice)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11