Search in sources :

Example 1 with HoodieSavepointMetadata

use of org.apache.hudi.avro.model.HoodieSavepointMetadata in project hudi by apache.

the class SavepointActionExecutor method execute.

@Override
public HoodieSavepointMetadata execute() {
    Option<HoodieInstant> cleanInstant = table.getCompletedCleanTimeline().lastInstant();
    if (!table.getCompletedCommitsTimeline().containsInstant(instantTime)) {
        throw new HoodieSavepointException("Could not savepoint non-existing commit " + instantTime);
    }
    try {
        // Check the last commit that was not cleaned and check if savepoint time is > that commit
        String lastCommitRetained;
        if (cleanInstant.isPresent()) {
            HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(table.getActiveTimeline().getInstantDetails(cleanInstant.get()).get());
            lastCommitRetained = cleanMetadata.getEarliestCommitToRetain();
        } else {
            lastCommitRetained = table.getCompletedCommitsTimeline().firstInstant().get().getTimestamp();
        }
        // Cannot allow savepoint time on a commit that could have been cleaned
        ValidationUtils.checkArgument(HoodieTimeline.compareTimestamps(instantTime, HoodieTimeline.GREATER_THAN_OR_EQUALS, lastCommitRetained), "Could not savepoint commit " + instantTime + " as this is beyond the lookup window " + lastCommitRetained);
        context.setJobStatus(this.getClass().getSimpleName(), "Collecting latest files for savepoint " + instantTime);
        List<String> partitions = FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), table.getMetaClient().getBasePath());
        Map<String, List<String>> latestFilesMap = context.mapToPair(partitions, partitionPath -> {
            // Scan all partitions files with this commit time
            LOG.info("Collecting latest files in partition path " + partitionPath);
            TableFileSystemView.BaseFileOnlyView view = table.getBaseFileOnlyView();
            List<String> latestFiles = view.getLatestBaseFilesBeforeOrOn(partitionPath, instantTime).map(HoodieBaseFile::getFileName).collect(Collectors.toList());
            return new ImmutablePair<>(partitionPath, latestFiles);
        }, null);
        HoodieSavepointMetadata metadata = TimelineMetadataUtils.convertSavepointMetadata(user, comment, latestFilesMap);
        // Nothing to save in the savepoint
        table.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime));
        table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime), TimelineMetadataUtils.serializeSavepointMetadata(metadata));
        LOG.info("Savepoint " + instantTime + " created");
        return metadata;
    } catch (IOException e) {
        throw new HoodieSavepointException("Failed to savepoint " + instantTime, e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) IOException(java.io.IOException) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) List(java.util.List) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView)

Example 2 with HoodieSavepointMetadata

use of org.apache.hudi.avro.model.HoodieSavepointMetadata in project hudi by apache.

the class HoodieTestTable method doSavepoint.

public HoodieSavepointMetadata doSavepoint(String commitTime) throws IOException {
    Option<HoodieCommitMetadata> commitMetadata = getMetadataForInstant(commitTime);
    if (!commitMetadata.isPresent()) {
        throw new IllegalArgumentException("Instant to rollback not present in timeline: " + commitTime);
    }
    Map<String, List<String>> partitionFiles = getPartitionFiles(commitMetadata.get());
    HoodieSavepointMetadata savepointMetadata = getSavepointMetadata(commitTime, partitionFiles);
    for (Map.Entry<String, List<String>> entry : partitionFiles.entrySet()) {
        deleteFilesInPartition(entry.getKey(), entry.getValue());
    }
    return savepointMetadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata)

Example 3 with HoodieSavepointMetadata

use of org.apache.hudi.avro.model.HoodieSavepointMetadata in project hudi by apache.

the class HoodieTestTable method getSavepointMetadata.

public HoodieSavepointMetadata getSavepointMetadata(String instant, Map<String, List<String>> partitionToFilesMeta) {
    HoodieSavepointMetadata savepointMetadata = new HoodieSavepointMetadata();
    savepointMetadata.setSavepointedAt(Long.valueOf(instant));
    Map<String, HoodieSavepointPartitionMetadata> partitionMetadataMap = new HashMap<>();
    for (Map.Entry<String, List<String>> entry : partitionToFilesMeta.entrySet()) {
        HoodieSavepointPartitionMetadata savepointPartitionMetadata = new HoodieSavepointPartitionMetadata();
        savepointPartitionMetadata.setPartitionPath(entry.getKey());
        savepointPartitionMetadata.setSavepointDataFile(entry.getValue());
        partitionMetadataMap.put(entry.getKey(), savepointPartitionMetadata);
    }
    savepointMetadata.setPartitionMetadata(partitionMetadataMap);
    savepointMetadata.setSavepointedBy("test");
    return savepointMetadata;
}
Also used : HoodieSavepointPartitionMetadata(org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata)

Example 4 with HoodieSavepointMetadata

use of org.apache.hudi.avro.model.HoodieSavepointMetadata in project hudi by apache.

the class CleanPlanner method getSavepointedDataFiles.

/**
 * Get the list of data file names savepointed.
 */
public Stream<String> getSavepointedDataFiles(String savepointTime) {
    if (!hoodieTable.getSavepoints().contains(savepointTime)) {
        throw new HoodieSavepointException("Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
    }
    HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
    HoodieSavepointMetadata metadata;
    try {
        metadata = TimelineMetadataUtils.deserializeHoodieSavepointMetadata(hoodieTable.getActiveTimeline().getInstantDetails(instant).get());
    } catch (IOException e) {
        throw new HoodieSavepointException("Could not get savepointed data files for savepoint " + savepointTime, e);
    }
    return metadata.getPartitionMetadata().values().stream().flatMap(s -> s.getSavepointDataFile().stream());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata)

Example 5 with HoodieSavepointMetadata

use of org.apache.hudi.avro.model.HoodieSavepointMetadata in project hudi by apache.

the class TimelineMetadataUtils method convertSavepointMetadata.

public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment, Map<String, List<String>> latestFiles) {
    Map<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder = new HashMap<>();
    for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
        HoodieSavepointPartitionMetadata metadata = new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
        partitionMetadataBuilder.put(stat.getKey(), metadata);
    }
    return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment, Collections.unmodifiableMap(partitionMetadataBuilder), DEFAULT_VERSION);
}
Also used : HoodieSavepointPartitionMetadata(org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata) HashMap(java.util.HashMap) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata)

Aggregations

HoodieSavepointMetadata (org.apache.hudi.avro.model.HoodieSavepointMetadata)6 List (java.util.List)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)2 HoodieSavepointPartitionMetadata (org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata)2 CollectionUtils.createImmutableMap (org.apache.hudi.common.util.CollectionUtils.createImmutableMap)2 HoodieSavepointException (org.apache.hudi.exception.HoodieSavepointException)2 HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)1 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)1 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)1 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)1 TableFileSystemView (org.apache.hudi.common.table.view.TableFileSystemView)1 ImmutablePair (org.apache.hudi.common.util.collection.ImmutablePair)1 HoodieException (org.apache.hudi.exception.HoodieException)1 HoodieIOException (org.apache.hudi.exception.HoodieIOException)1 Logger (org.apache.log4j.Logger)1