Search in sources :

Example 1 with HoodieDefaultTimeline

use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.

the class HoodieInputFormatUtils method filterInstantsTimeline.

/**
 * Filter any specific instants that we do not want to process.
 * example timeline:
 *
 * t0 -> create bucket1.parquet
 * t1 -> create and append updates bucket1.log
 * t2 -> request compaction
 * t3 -> create bucket2.parquet
 *
 * if compaction at t2 takes a long time, incremental readers on RO tables can move to t3 and would skip updates in t1
 *
 * To workaround this problem, we want to stop returning data belonging to commits > t2.
 * After compaction is complete, incremental reader would see updates in t2, t3, so on.
 * @param timeline
 * @return
 */
public static HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
    HoodieDefaultTimeline commitsAndCompactionTimeline = timeline.getWriteTimeline();
    Option<HoodieInstant> pendingCompactionInstant = commitsAndCompactionTimeline.filterPendingCompactionTimeline().firstInstant();
    if (pendingCompactionInstant.isPresent()) {
        HoodieDefaultTimeline instantsTimeline = commitsAndCompactionTimeline.findInstantsBefore(pendingCompactionInstant.get().getTimestamp());
        int numCommitsFilteredByCompaction = commitsAndCompactionTimeline.getCommitsTimeline().countInstants() - instantsTimeline.getCommitsTimeline().countInstants();
        LOG.info("Earliest pending compaction instant is: " + pendingCompactionInstant.get().getTimestamp() + " skipping " + numCommitsFilteredByCompaction + " commits");
        return instantsTimeline;
    } else {
        return timeline;
    }
}
Also used : HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant)

Example 2 with HoodieDefaultTimeline

use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.

the class HoodieInputFormatUtils method getFilteredCommitsTimeline.

/**
 * Extract HoodieTimeline based on HoodieTableMetaClient.
 * @param job
 * @param tableMetaClient
 * @return
 */
public static Option<HoodieTimeline> getFilteredCommitsTimeline(JobContext job, HoodieTableMetaClient tableMetaClient) {
    String tableName = tableMetaClient.getTableConfig().getTableName();
    HoodieDefaultTimeline baseTimeline;
    if (HoodieHiveUtils.stopAtCompaction(job, tableName)) {
        baseTimeline = filterInstantsTimeline(tableMetaClient.getActiveTimeline());
    } else {
        baseTimeline = tableMetaClient.getActiveTimeline();
    }
    return Option.of(baseTimeline.getCommitsTimeline().filterCompletedInstants());
}
Also used : HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline)

Example 3 with HoodieDefaultTimeline

use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.

the class FileSystemViewCommand method buildFileSystemView.

/**
 * Build File System View.
 *
 * @param globRegex Path Regex
 * @param maxInstant Max Instants to be used for displaying file-instants
 * @param basefileOnly Include only base file view
 * @param includeMaxInstant Include Max instant
 * @param includeInflight Include inflight instants
 * @param excludeCompaction Exclude Compaction instants
 * @return
 * @throws IOException
 */
private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant, boolean basefileOnly, boolean includeMaxInstant, boolean includeInflight, boolean excludeCompaction) throws IOException {
    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(client.getHadoopConf()).setBasePath(client.getBasePath()).setLoadActiveTimelineOnLoad(true).build();
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", client.getBasePath(), globRegex);
    List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
    Stream<HoodieInstant> instantsStream;
    HoodieTimeline timeline;
    if (basefileOnly) {
        timeline = metaClient.getActiveTimeline().getCommitTimeline();
    } else if (excludeCompaction) {
        timeline = metaClient.getActiveTimeline().getCommitsTimeline();
    } else {
        timeline = metaClient.getActiveTimeline().getWriteTimeline();
    }
    if (!includeInflight) {
        timeline = timeline.filterCompletedInstants();
    }
    instantsStream = timeline.getInstants();
    if (!maxInstant.isEmpty()) {
        final BiPredicate<String, String> predicate;
        if (includeMaxInstant) {
            predicate = HoodieTimeline.GREATER_THAN_OR_EQUALS;
        } else {
            predicate = HoodieTimeline.GREATER_THAN;
        }
        instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
    }
    HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream, (Function<HoodieInstant, Option<byte[]>> & Serializable) metaClient.getActiveTimeline()::getInstantDetails);
    return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses.toArray(new FileStatus[0]));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CommandMarker(org.springframework.shell.core.CommandMarker) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) NumericUtils(org.apache.hudi.common.util.NumericUtils) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) FileSystem(org.apache.hadoop.fs.FileSystem) Option(org.apache.hudi.common.util.Option) CliOption(org.springframework.shell.core.annotation.CliOption) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 4 with HoodieDefaultTimeline

use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.

the class CompactionCommand method printAllCompactions.

/**
 * Prints all compaction details.
 */
private String printAllCompactions(HoodieDefaultTimeline timeline, Function<HoodieInstant, HoodieCompactionPlan> compactionPlanReader, boolean includeExtraMetadata, String sortByField, boolean descending, int limit, boolean headerOnly) {
    Stream<HoodieInstant> instantsStream = timeline.getWriteTimeline().getReverseOrderedInstants();
    List<Pair<HoodieInstant, HoodieCompactionPlan>> compactionPlans = instantsStream.map(instant -> Pair.of(instant, compactionPlanReader.apply(instant))).filter(pair -> pair.getRight() != null).collect(Collectors.toList());
    Set<String> committedInstants = timeline.getCommitTimeline().filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
    List<Comparable[]> rows = new ArrayList<>();
    for (Pair<HoodieInstant, HoodieCompactionPlan> compactionPlan : compactionPlans) {
        HoodieCompactionPlan plan = compactionPlan.getRight();
        HoodieInstant instant = compactionPlan.getLeft();
        final HoodieInstant.State state;
        if (committedInstants.contains(instant.getTimestamp())) {
            state = HoodieInstant.State.COMPLETED;
        } else {
            state = instant.getState();
        }
        if (includeExtraMetadata) {
            rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size(), plan.getExtraMetadata().toString() });
        } else {
            rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size() });
        }
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
    if (includeExtraMetadata) {
        header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
    }
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) ObjectInputStream(java.io.ObjectInputStream) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) OperationResult(org.apache.hudi.table.action.compact.OperationResult) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Utils(org.apache.spark.util.Utils) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CommandMarker(org.springframework.shell.core.CommandMarker) SparkCommand(org.apache.hudi.cli.commands.SparkMain.SparkCommand) UtilHelpers(org.apache.hudi.utilities.UtilHelpers) TableHeader(org.apache.hudi.cli.TableHeader) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) InputStreamConsumer(org.apache.hudi.cli.utils.InputStreamConsumer) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) List(java.util.List) Stream(java.util.stream.Stream) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) CommitUtil(org.apache.hudi.cli.utils.CommitUtil) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand) SparkLauncher(org.apache.spark.launcher.SparkLauncher) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) SparkUtil(org.apache.hudi.cli.utils.SparkUtil) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Pair(org.apache.hudi.common.util.collection.Pair)

Example 5 with HoodieDefaultTimeline

use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.

the class HoodieTableMetadataUtil method getFileSystemView.

/**
 * Get metadata table file system view.
 *
 * @param metaClient - Metadata table meta client
 * @return Filesystem view for the metadata table
 */
public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) {
    // If there are no commits on the metadata table then the table's
    // default FileSystemView will not return any file slices even
    // though we may have initialized them.
    HoodieTimeline timeline = metaClient.getActiveTimeline();
    if (timeline.empty()) {
        final HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.createNewInstantTime());
        timeline = new HoodieDefaultTimeline(Stream.of(instant), metaClient.getActiveTimeline()::getInstantDetails);
    }
    return new HoodieTableFileSystemView(metaClient, timeline);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Aggregations

HoodieDefaultTimeline (org.apache.hudi.common.table.timeline.HoodieDefaultTimeline)6 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)4 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)3 CliCommand (org.springframework.shell.core.annotation.CliCommand)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Function (java.util.function.Function)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 HoodieCLI (org.apache.hudi.cli.HoodieCLI)2 HoodiePrintHelper (org.apache.hudi.cli.HoodiePrintHelper)2 HoodieTableHeaderFields (org.apache.hudi.cli.HoodieTableHeaderFields)2 TableHeader (org.apache.hudi.cli.TableHeader)2 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)2 HoodieTableFileSystemView (org.apache.hudi.common.table.view.HoodieTableFileSystemView)2