Search in sources :

Example 6 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class TestClusteringUtils method createRequestedReplaceInstant.

private HoodieInstant createRequestedReplaceInstant(String partitionPath1, String clusterTime, List<String>... fileIds) throws IOException {
    List<FileSlice>[] fileSliceGroups = new List[fileIds.length];
    for (int i = 0; i < fileIds.length; i++) {
        fileSliceGroups[i] = fileIds[i].stream().map(fileId -> generateFileSlice(partitionPath1, fileId, "0")).collect(Collectors.toList());
    }
    HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan(CLUSTERING_STRATEGY_CLASS, STRATEGY_PARAMS, fileSliceGroups, Collections.emptyMap());
    HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusterTime);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
    metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
    return clusteringInstant;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) ArrayList(java.util.ArrayList) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 7 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class PartitionAwareClusteringPlanStrategy method generateClusteringPlan.

@Override
public Option<HoodieClusteringPlan> generateClusteringPlan() {
    HoodieTableMetaClient metaClient = getHoodieTable().getMetaClient();
    LOG.info("Scheduling clustering for " + metaClient.getBasePath());
    HoodieWriteConfig config = getWriteConfig();
    List<String> partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath());
    // get matched partitions if set
    partitionPaths = getMatchedPartitions(config, partitionPaths);
    // filter the partition paths if needed to reduce list status
    partitionPaths = filterPartitionPaths(partitionPaths);
    if (partitionPaths.isEmpty()) {
        // In case no partitions could be picked, return no clustering plan
        return Option.empty();
    }
    List<HoodieClusteringGroup> clusteringGroups = getEngineContext().flatMap(partitionPaths, partitionPath -> {
        List<FileSlice> fileSlicesEligible = getFileSlicesEligibleForClustering(partitionPath).collect(Collectors.toList());
        return buildClusteringGroupsForPartition(partitionPath, fileSlicesEligible).limit(getWriteConfig().getClusteringMaxNumGroups());
    }, partitionPaths.size()).stream().limit(getWriteConfig().getClusteringMaxNumGroups()).collect(Collectors.toList());
    if (clusteringGroups.isEmpty()) {
        LOG.info("No data available to cluster");
        return Option.empty();
    }
    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(getWriteConfig().getClusteringExecutionStrategyClass()).setStrategyParams(getStrategyParams()).build();
    return Option.of(HoodieClusteringPlan.newBuilder().setStrategy(strategy).setInputGroups(clusteringGroups).setExtraMetadata(getExtraMetadata()).setVersion(getPlanVersion()).setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadataForClustering()).build());
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Collectors(java.util.stream.Collectors) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) List(java.util.List) Stream(java.util.stream.Stream) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ClusteringPlanPartitionFilter(org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilter) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup)

Example 8 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplaceRequested.

private void createReplaceRequested(String instantTime) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata)).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 9 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class TestClusteringUtils method testClusteringPlanInflight.

// replacecommit.inflight doesnt have clustering plan.
// Verify that getClusteringPlan fetches content from corresponding requested file.
@Test
public void testClusteringPlanInflight() throws Exception {
    String partitionPath1 = "partition1";
    List<String> fileIds1 = new ArrayList<>();
    fileIds1.add(UUID.randomUUID().toString());
    fileIds1.add(UUID.randomUUID().toString());
    String clusterTime1 = "1";
    HoodieInstant requestedInstant = createRequestedReplaceInstant(partitionPath1, clusterTime1, fileIds1);
    HoodieInstant inflightInstant = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(requestedInstant, Option.empty());
    HoodieClusteringPlan requestedClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, requestedInstant).get().getRight();
    HoodieClusteringPlan inflightClusteringPlan = ClusteringUtils.getClusteringPlan(metaClient, inflightInstant).get().getRight();
    assertEquals(requestedClusteringPlan, inflightClusteringPlan);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) ArrayList(java.util.ArrayList) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Test(org.junit.jupiter.api.Test)

Example 10 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class ClusteringTestUtils method createClusteringPlan.

public static HoodieClusteringPlan createClusteringPlan(HoodieTableMetaClient metaClient, String instantTime, String fileId) {
    try {
        String basePath = metaClient.getBasePath();
        String partition = DEFAULT_PARTITION_PATHS[0];
        createBaseFile(basePath, partition, instantTime, fileId, 1);
        FileSlice slice = new FileSlice(partition, instantTime, fileId);
        slice.setBaseFile(new CompactionTestUtils.DummyHoodieBaseFile(Paths.get(basePath, partition, baseFileName(instantTime, fileId)).toString()));
        List<FileSlice>[] fileSliceGroups = new List[] { Collections.singletonList(slice) };
        HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan("strategy", new HashMap<>(), fileSliceGroups, Collections.emptyMap());
        return clusteringPlan;
    } catch (Exception e) {
        throw new HoodieException(e.getMessage(), e);
    }
}
Also used : FileSlice(org.apache.hudi.common.model.FileSlice) List(java.util.List) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Aggregations

HoodieClusteringPlan (org.apache.hudi.avro.model.HoodieClusteringPlan)14 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)10 List (java.util.List)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)7 IOException (java.io.IOException)5 HoodieClusteringGroup (org.apache.hudi.avro.model.HoodieClusteringGroup)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Collectors (java.util.stream.Collectors)4 Stream (java.util.stream.Stream)4 HoodieSliceInfo (org.apache.hudi.avro.model.HoodieSliceInfo)4 FileSlice (org.apache.hudi.common.model.FileSlice)4 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)4 LogManager (org.apache.log4j.LogManager)4 Test (org.junit.jupiter.api.Test)4 Arrays (java.util.Arrays)3 Map (java.util.Map)3 FSUtils (org.apache.hudi.common.fs.FSUtils)3 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)3 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)3