Search in sources :

Example 1 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestMetadataConversionUtils method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // some cases requestedReplaceMetadata will be null
    // e.g. insert_overwrite_table or insert_overwrite without clustering
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
    HoodieCommitMetadata inflightReplaceMetadata = null;
    if (isClustering) {
        requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
        requestedReplaceMetadata.setOperationType(writeOperationType.name());
        HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
        HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
        HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
        clusteringGroup.setSlices(Arrays.asList(sliceInfo));
        clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
        requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
        requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    } else {
        // inflightReplaceMetadata will be null in clustering but not null
        // in insert_overwrite or insert_overwrite_table
        inflightReplaceMetadata = new HoodieCommitMetadata();
        inflightReplaceMetadata.setOperationType(writeOperationType);
        inflightReplaceMetadata.setCompacted(false);
    }
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 2 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 3 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestCleaner method generateReplaceCommitMetadata.

private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(String instantTime, String partition, String replacedFileId, String newFileId) {
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.toString());
    requestedReplaceMetadata.setVersion(1);
    HoodieSliceInfo sliceInfo = HoodieSliceInfo.newBuilder().setFileId(replacedFileId).build();
    List<HoodieClusteringGroup> clusteringGroups = new ArrayList<>();
    clusteringGroups.add(HoodieClusteringGroup.newBuilder().setVersion(1).setNumOutputFileGroups(1).setMetrics(Collections.emptyMap()).setSlices(Collections.singletonList(sliceInfo)).build());
    requestedReplaceMetadata.setExtraMetadata(Collections.emptyMap());
    requestedReplaceMetadata.setClusteringPlan(HoodieClusteringPlan.newBuilder().setVersion(1).setExtraMetadata(Collections.emptyMap()).setStrategy(HoodieClusteringStrategy.newBuilder().setStrategyClassName("").setVersion(1).build()).setInputGroups(clusteringGroups).build());
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    replaceMetadata.addReplaceFileId(partition, replacedFileId);
    replaceMetadata.setOperationType(WriteOperationType.CLUSTER);
    if (!StringUtils.isNullOrEmpty(newFileId)) {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partition);
        writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
        writeStat.setFileId(newFileId);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        replaceMetadata.addWriteStat(partition, writeStat);
    }
    return Pair.of(requestedReplaceMetadata, replaceMetadata);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 4 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class HoodieTestReplaceCommitMetadataGenerator method createReplaceCommitFileWithMetadata.

public static void createReplaceCommitFileWithMetadata(String basePath, String commitTime, Option<Integer> writes, Option<Integer> updates, HoodieTableMetaClient metaclient) throws Exception {
    HoodieReplaceCommitMetadata replaceMetadata = generateReplaceCommitMetadata(basePath, commitTime, UUID.randomUUID().toString(), UUID.randomUUID().toString(), writes, updates);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = getHoodieRequestedReplaceMetadata();
    HoodieTestTable.of(metaclient).addReplaceCommit(commitTime, Option.ofNullable(requestedReplaceMetadata), Option.empty(), replaceMetadata);
}
Also used : HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 5 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class IncrementalTimelineSyncFileSystemView method addReplaceInstant.

/**
 * Add newly found REPLACE instant.
 *
 * @param timeline Hoodie Timeline
 * @param instant REPLACE Instant
 */
private void addReplaceInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
    LOG.info("Syncing replace instant (" + instant + ")");
    HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
    updatePartitionWriteFileGroups(replaceMetadata.getPartitionToWriteStats(), timeline, instant);
    replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().forEach(entry -> {
        String partition = entry.getKey();
        Map<HoodieFileGroupId, HoodieInstant> replacedFileIds = entry.getValue().stream().collect(Collectors.toMap(replaceStat -> new HoodieFileGroupId(partition, replaceStat), replaceStat -> instant));
        LOG.info("For partition (" + partition + ") of instant (" + instant + "), excluding " + replacedFileIds.size() + " file groups");
        addReplacedFileGroups(replacedFileIds);
    });
    LOG.info("Done Syncing REPLACE instant (" + instant + ")");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) TimelineDiffHelper(org.apache.hudi.common.table.timeline.TimelineDiffHelper) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) TimelineDiffResult(org.apache.hudi.common.table.timeline.TimelineDiffHelper.TimelineDiffResult) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)19 List (java.util.List)14 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)13 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)11 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)10 Map (java.util.Map)8 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 Option (org.apache.hudi.common.util.Option)6 LogManager (org.apache.log4j.LogManager)6 Logger (org.apache.log4j.Logger)6 Collectors (java.util.stream.Collectors)5 WriteOperationType (org.apache.hudi.common.model.WriteOperationType)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 IOException (java.io.IOException)4 Path (org.apache.hadoop.fs.Path)4 FSUtils (org.apache.hudi.common.fs.FSUtils)4 Set (java.util.Set)3