Search in sources :

Example 11 with HoodieArchivedTimeline

use of org.apache.hudi.common.table.timeline.HoodieArchivedTimeline in project hudi by apache.

the class CompactionCommand method compactionsShowArchived.

@CliCommand(value = "compactions showarchived", help = "Shows compaction details for specified time window")
public String compactionsShowArchived(@CliOption(key = { "includeExtraMetadata" }, help = "Include extra metadata", unspecifiedDefaultValue = "false") final boolean includeExtraMetadata, @CliOption(key = { "startTs" }, mandatory = false, help = "start time for compactions, default: now - 10 days") String startTs, @CliOption(key = { "endTs" }, mandatory = false, help = "end time for compactions, default: now - 1 day") String endTs, @CliOption(key = { "limit" }, help = "Limit compactions", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) {
    if (StringUtils.isNullOrEmpty(startTs)) {
        startTs = CommitUtil.getTimeDaysAgo(10);
    }
    if (StringUtils.isNullOrEmpty(endTs)) {
        endTs = CommitUtil.getTimeDaysAgo(1);
    }
    HoodieTableMetaClient client = checkAndGetMetaClient();
    HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
    archivedTimeline.loadCompactionDetailsInMemory(startTs, endTs);
    try {
        return printAllCompactions(archivedTimeline, compactionPlanReader(this::readCompactionPlanForArchivedTimeline, archivedTimeline), includeExtraMetadata, sortByField, descending, limit, headerOnly);
    } finally {
        archivedTimeline.clearInstantDetailsFromMemory(startTs, endTs);
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 12 with HoodieArchivedTimeline

use of org.apache.hudi.common.table.timeline.HoodieArchivedTimeline in project hudi by apache.

the class IncrementalInputSplits method getArchivedMetadata.

/**
 * Returns the archived metadata in case the reader consumes untimely or it wants
 * to read from the earliest.
 *
 * <p>Note: should improve it with metadata table when the metadata table is stable enough.
 *
 * @param metaClient     The meta client
 * @param instantRange   The instant range to filter the timeline instants
 * @param commitTimeline The commit timeline
 * @param tableName      The table name
 * @return the list of archived metadata, or empty if there is no need to read the archived timeline
 */
private List<HoodieCommitMetadata> getArchivedMetadata(HoodieTableMetaClient metaClient, InstantRange instantRange, HoodieTimeline commitTimeline, String tableName) {
    if (commitTimeline.isBeforeTimelineStarts(instantRange.getStartInstant())) {
        // read the archived metadata if the start instant is archived.
        HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline(instantRange.getStartInstant());
        HoodieTimeline archivedCompleteTimeline = archivedTimeline.getCommitsTimeline().filterCompletedInstants();
        if (!archivedCompleteTimeline.empty()) {
            Stream<HoodieInstant> instantStream = archivedCompleteTimeline.getInstants();
            return maySkipCompaction(instantStream).map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, archivedTimeline)).collect(Collectors.toList());
        }
    }
    return Collections.emptyList();
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieInputFormatUtils) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) Serializable(scala.Serializable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseFile(org.apache.hudi.common.model.BaseFile) Path(org.apache.flink.core.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) LESSER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Objects(java.util.Objects) WriteProfiles(org.apache.hudi.sink.partitioner.profile.WriteProfiles) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) InstantRange(org.apache.hudi.common.table.log.InstantRange) MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) Collections(java.util.Collections) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)

Aggregations

HoodieArchivedTimeline (org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)12 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)8 Path (org.apache.hadoop.fs.Path)7 ArrayList (java.util.ArrayList)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 Option (org.apache.hudi.common.util.Option)6 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)6 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)6 ValueSource (org.junit.jupiter.params.provider.ValueSource)6 IOException (java.io.IOException)5 Collections (java.util.Collections)5 HashSet (java.util.HashSet)5 Map (java.util.Map)5 Stream (java.util.stream.Stream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 HoodieTimelineArchiver (org.apache.hudi.client.HoodieTimelineArchiver)5