use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class TestClusteringUtils method createRequestedReplaceInstant.
private HoodieInstant createRequestedReplaceInstant(String partitionPath1, String clusterTime, List<String>... fileIds) throws IOException {
List<FileSlice>[] fileSliceGroups = new List[fileIds.length];
for (int i = 0; i < fileIds.length; i++) {
fileSliceGroups[i] = fileIds[i].stream().map(fileId -> generateFileSlice(partitionPath1, fileId, "0")).collect(Collectors.toList());
}
HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan(CLUSTERING_STRATEGY_CLASS, STRATEGY_PARAMS, fileSliceGroups, Collections.emptyMap());
HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusterTime);
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
return clusteringInstant;
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class PartitionAwareClusteringPlanStrategy method generateClusteringPlan.
@Override
public Option<HoodieClusteringPlan> generateClusteringPlan() {
HoodieTableMetaClient metaClient = getHoodieTable().getMetaClient();
LOG.info("Scheduling clustering for " + metaClient.getBasePath());
HoodieWriteConfig config = getWriteConfig();
List<String> partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath());
// get matched partitions if set
partitionPaths = getMatchedPartitions(config, partitionPaths);
// filter the partition paths if needed to reduce list status
partitionPaths = filterPartitionPaths(partitionPaths);
if (partitionPaths.isEmpty()) {
// In case no partitions could be picked, return no clustering plan
return Option.empty();
}
List<HoodieClusteringGroup> clusteringGroups = getEngineContext().flatMap(partitionPaths, partitionPath -> {
List<FileSlice> fileSlicesEligible = getFileSlicesEligibleForClustering(partitionPath).collect(Collectors.toList());
return buildClusteringGroupsForPartition(partitionPath, fileSlicesEligible).limit(getWriteConfig().getClusteringMaxNumGroups());
}, partitionPaths.size()).stream().limit(getWriteConfig().getClusteringMaxNumGroups()).collect(Collectors.toList());
if (clusteringGroups.isEmpty()) {
LOG.info("No data available to cluster");
return Option.empty();
}
HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(getWriteConfig().getClusteringExecutionStrategyClass()).setStrategyParams(getStrategyParams()).build();
return Option.of(HoodieClusteringPlan.newBuilder().setStrategy(strategy).setInputGroups(clusteringGroups).setExtraMetadata(getExtraMetadata()).setVersion(getPlanVersion()).setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadataForClustering()).build());
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplaceRequested.
private void createReplaceRequested(String instantTime) throws Exception {
String fileId1 = "file-1";
String fileId2 = "file-2";
// create replace instant to mark fileId1 as deleted
HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
sliceInfo.setFileId(fileId1);
sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
HoodieTestTable.of(metaClient).addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata)).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class TestClusteringUtils method testClusteringPlanInflight.
// replacecommit.inflight doesnt have clustering plan.
// Verify that getClusteringPlan fetches content from corresponding requested file.
@Test
public void testClusteringPlanInflight() throws Exception {
String partitionPath1 = "partition1";
List<String> fileIds1 = new ArrayList<>();
fileIds1.add(UUID.randomUUID().toString());
fileIds1.add(UUID.randomUUID().toString());
String clusterTime1 = "1";
HoodieInstant requestedInstant = createRequestedReplaceInstant(partitionPath1, clusterTime1, fileIds1);
HoodieInstant inflightInstant = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(requestedInstant, Option.empty());
HoodieClusteringPlan requestedClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, requestedInstant).get().getRight();
HoodieClusteringPlan inflightClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, inflightInstant).get().getRight();
assertEquals(requestedClusteringPlan, inflightClusteringPlan);
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class ClusteringTestUtils method createClusteringPlan.
public static HoodieClusteringPlan createClusteringPlan(HoodieTableMetaClient metaClient, String instantTime, String fileId) {
try {
String basePath = metaClient.getBasePath();
String partition = DEFAULT_PARTITION_PATHS[0];
createBaseFile(basePath, partition, instantTime, fileId, 1);
FileSlice slice = new FileSlice(partition, instantTime, fileId);
slice.setBaseFile(new CompactionTestUtils.DummyHoodieBaseFile(Paths.get(basePath, partition, baseFileName(instantTime, fileId)).toString()));
List<FileSlice>[] fileSliceGroups = new List[] { Collections.singletonList(slice) };
HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan("strategy", new HashMap<>(), fileSliceGroups, Collections.emptyMap());
return clusteringPlan;
} catch (Exception e) {
throw new HoodieException(e.getMessage(), e);
}
}
Aggregations