use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.
the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplaceRequested.
private void createReplaceRequested(String instantTime) throws Exception {
String fileId1 = "file-1";
String fileId2 = "file-2";
// create replace instant to mark fileId1 as deleted
HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
sliceInfo.setFileId(fileId1);
sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
HoodieTestTable.of(metaClient).addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata)).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.
the class ClusteringUtils method createClusteringPlan.
/**
* Create clustering plan from input fileSliceGroups.
*/
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName, Map<String, String> strategyParams, List<FileSlice>[] fileSliceGroups, Map<String, String> extraMetadata) {
List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
}).collect(Collectors.toList());
HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(strategyClassName).setStrategyParams(strategyParams).build();
return HoodieClusteringPlan.newBuilder().setInputGroups(clusteringGroups).setExtraMetadata(extraMetadata).setStrategy(strategy).build();
}
use of org.apache.hudi.avro.model.HoodieClusteringGroup in project hudi by apache.
the class JavaExecutionStrategy method runClusteringForGroup.
/**
* Executes clustering for the group.
*/
private List<WriteStatus> runClusteringForGroup(HoodieClusteringGroup clusteringGroup, Map<String, String> strategyParams, boolean preserveHoodieMetadata, String instantTime) {
List<HoodieRecord<T>> inputRecords = readRecordsForGroup(clusteringGroup, instantTime);
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(getWriteConfig().getSchema()));
List<HoodieFileGroupId> inputFileIds = clusteringGroup.getSlices().stream().map(info -> new HoodieFileGroupId(info.getPartitionPath(), info.getFileId())).collect(Collectors.toList());
return performClusteringWithRecordList(inputRecords, clusteringGroup.getNumOutputFileGroups(), instantTime, strategyParams, readerSchema, inputFileIds, preserveHoodieMetadata);
}
Aggregations