Search in sources :

Example 6 with HoodieClusteringGroup

use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplaceRequested.

private void createReplaceRequested(String instantTime) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata)).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 7 with HoodieClusteringGroup

use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.

the class ClusteringUtils method createClusteringPlan.

/**
 * Create clustering plan from input fileSliceGroups.
 */
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName, Map<String, String> strategyParams, List<FileSlice>[] fileSliceGroups, Map<String, String> extraMetadata) {
    List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
        Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
        List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
        return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
    }).collect(Collectors.toList());
    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(strategyClassName).setStrategyParams(strategyParams).build();
    return HoodieClusteringPlan.newBuilder().setInputGroups(clusteringGroups).setExtraMetadata(extraMetadata).setStrategy(strategy).build();
}
Also used : Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) HashMap(java.util.HashMap) Logger(org.apache.log4j.Logger) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) AbstractMap(java.util.AbstractMap) List(java.util.List) Stream(java.util.stream.Stream) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) AbstractMap(java.util.AbstractMap) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup)

Example 8 with HoodieClusteringGroup

use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.

the class JavaExecutionStrategy method runClusteringForGroup.

/**
 * Executes clustering for the group.
 */
private List<WriteStatus> runClusteringForGroup(HoodieClusteringGroup clusteringGroup, Map<String, String> strategyParams, boolean preserveHoodieMetadata, String instantTime) {
    List<HoodieRecord<T>> inputRecords = readRecordsForGroup(clusteringGroup, instantTime);
    Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(getWriteConfig().getSchema()));
    List<HoodieFileGroupId> inputFileIds = clusteringGroup.getSlices().stream().map(info -> new HoodieFileGroupId(info.getPartitionPath(), info.getFileId())).collect(Collectors.toList());
    return performClusteringWithRecordList(inputRecords, clusteringGroup.getNumOutputFileGroups(), instantTime, strategyParams, readerSchema, inputFileIds, preserveHoodieMetadata);
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) JavaCustomColumnsSortPartitioner(org.apache.hudi.execution.bulkinsert.JavaCustomColumnsSortPartitioner) KeyGenUtils(org.apache.hudi.keygen.KeyGenUtils) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) JavaTaskContextSupplier(org.apache.hudi.client.common.JavaTaskContextSupplier) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieFileSliceReader.getFileSliceReader(org.apache.hudi.common.table.log.HoodieFileSliceReader.getFileSliceReader) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) ArrayList(java.util.ArrayList) HoodieList(org.apache.hudi.common.data.HoodieList) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) PLAN_STRATEGY_SORT_COLUMNS(org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ClusteringOperation(org.apache.hudi.common.model.ClusteringOperation) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema)

Aggregations

HoodieClusteringGroup (org.apache.hudi.avro.model.HoodieClusteringGroup)8 HoodieClusteringPlan (org.apache.hudi.avro.model.HoodieClusteringPlan)7 List (java.util.List)6 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)5 HoodieSliceInfo (org.apache.hudi.avro.model.HoodieSliceInfo)5 ArrayList (java.util.ArrayList)4 Collectors (java.util.stream.Collectors)4 LogManager (org.apache.log4j.LogManager)4 Logger (org.apache.log4j.Logger)4 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Stream (java.util.stream.Stream)3 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)3 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)3 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)3 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)3 Arrays (java.util.Arrays)2 Iterator (java.util.Iterator)2 Schema (org.apache.avro.Schema)2