Search in sources :

Example 1 with HoodieSliceInfo

use of org.apache.hudi.avro.model.HoodieSliceInfo in project hudi by apache.

the class TestMetadataConversionUtils method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // some cases requestedReplaceMetadata will be null
    // e.g. insert_overwrite_table or insert_overwrite without clustering
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
    HoodieCommitMetadata inflightReplaceMetadata = null;
    if (isClustering) {
        requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
        requestedReplaceMetadata.setOperationType(writeOperationType.name());
        HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
        HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
        HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
        clusteringGroup.setSlices(Arrays.asList(sliceInfo));
        clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
        requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
        requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    } else {
        // inflightReplaceMetadata will be null in clustering but not null
        // in insert_overwrite or insert_overwrite_table
        inflightReplaceMetadata = new HoodieCommitMetadata();
        inflightReplaceMetadata.setOperationType(writeOperationType);
        inflightReplaceMetadata.setCompacted(false);
    }
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 2 with HoodieSliceInfo

use of org.apache.hudi.avro.model.HoodieSliceInfo in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 3 with HoodieSliceInfo

use of org.apache.hudi.avro.model.HoodieSliceInfo in project hudi by apache.

the class TestCleaner method generateReplaceCommitMetadata.

private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(String instantTime, String partition, String replacedFileId, String newFileId) {
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.toString());
    requestedReplaceMetadata.setVersion(1);
    HoodieSliceInfo sliceInfo = HoodieSliceInfo.newBuilder().setFileId(replacedFileId).build();
    List<HoodieClusteringGroup> clusteringGroups = new ArrayList<>();
    clusteringGroups.add(HoodieClusteringGroup.newBuilder().setVersion(1).setNumOutputFileGroups(1).setMetrics(Collections.emptyMap()).setSlices(Collections.singletonList(sliceInfo)).build());
    requestedReplaceMetadata.setExtraMetadata(Collections.emptyMap());
    requestedReplaceMetadata.setClusteringPlan(HoodieClusteringPlan.newBuilder().setVersion(1).setExtraMetadata(Collections.emptyMap()).setStrategy(HoodieClusteringStrategy.newBuilder().setStrategyClassName("").setVersion(1).build()).setInputGroups(clusteringGroups).build());
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    replaceMetadata.addReplaceFileId(partition, replacedFileId);
    replaceMetadata.setOperationType(WriteOperationType.CLUSTER);
    if (!StringUtils.isNullOrEmpty(newFileId)) {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partition);
        writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
        writeStat.setFileId(newFileId);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        replaceMetadata.addWriteStat(partition, writeStat);
    }
    return Pair.of(requestedReplaceMetadata, replaceMetadata);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 4 with HoodieSliceInfo

use of org.apache.hudi.avro.model.HoodieSliceInfo in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplaceRequested.

private void createReplaceRequested(String instantTime) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata)).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 5 with HoodieSliceInfo

use of org.apache.hudi.avro.model.HoodieSliceInfo in project hudi by apache.

the class ClusteringUtils method createClusteringPlan.

/**
 * Create clustering plan from input fileSliceGroups.
 */
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName, Map<String, String> strategyParams, List<FileSlice>[] fileSliceGroups, Map<String, String> extraMetadata) {
    List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
        Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
        List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
        return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
    }).collect(Collectors.toList());
    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(strategyClassName).setStrategyParams(strategyParams).build();
    return HoodieClusteringPlan.newBuilder().setInputGroups(clusteringGroups).setExtraMetadata(extraMetadata).setStrategy(strategy).build();
}
Also used : Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) HashMap(java.util.HashMap) Logger(org.apache.log4j.Logger) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) AbstractMap(java.util.AbstractMap) List(java.util.List) Stream(java.util.stream.Stream) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) AbstractMap(java.util.AbstractMap) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup)

Aggregations

HoodieClusteringGroup (org.apache.hudi.avro.model.HoodieClusteringGroup)5 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)5 HoodieSliceInfo (org.apache.hudi.avro.model.HoodieSliceInfo)5 HoodieClusteringPlan (org.apache.hudi.avro.model.HoodieClusteringPlan)4 HashMap (java.util.HashMap)3 List (java.util.List)3 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)3 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)3 ArrayList (java.util.ArrayList)2 IOException (java.io.IOException)1 AbstractMap (java.util.AbstractMap)1 Arrays (java.util.Arrays)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 HoodieClusteringStrategy (org.apache.hudi.avro.model.HoodieClusteringStrategy)1 FSUtils (org.apache.hudi.common.fs.FSUtils)1 BaseFile (org.apache.hudi.common.model.BaseFile)1 FileSlice (org.apache.hudi.common.model.FileSlice)1 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)1