Search in sources :

Example 11 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class ClusteringPlanActionExecutor method execute.

@Override
public Option<HoodieClusteringPlan> execute() {
    Option<HoodieClusteringPlan> planOption = createClusteringPlan();
    if (planOption.isPresent()) {
        HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime);
        try {
            HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setOperationType(WriteOperationType.CLUSTER.name()).setExtraMetadata(extraMetadata.orElse(Collections.emptyMap())).setClusteringPlan(planOption.get()).build();
            table.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
        } catch (IOException ioe) {
            throw new HoodieIOException("Exception scheduling clustering", ioe);
        }
    }
    return planOption;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 12 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class ClusteringUtils method createClusteringPlan.

/**
 * Create clustering plan from input fileSliceGroups.
 */
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName, Map<String, String> strategyParams, List<FileSlice>[] fileSliceGroups, Map<String, String> extraMetadata) {
    List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
        Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
        List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
        return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
    }).collect(Collectors.toList());
    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(strategyClassName).setStrategyParams(strategyParams).build();
    return HoodieClusteringPlan.newBuilder().setInputGroups(clusteringGroups).setExtraMetadata(extraMetadata).setStrategy(strategy).build();
}
Also used : Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) HashMap(java.util.HashMap) Logger(org.apache.log4j.Logger) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) AbstractMap(java.util.AbstractMap) List(java.util.List) Stream(java.util.stream.Stream) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) AbstractMap(java.util.AbstractMap) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup)

Example 13 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method createRequestedReplaceInstant.

protected HoodieInstant createRequestedReplaceInstant(HoodieTableMetaClient metaClient, String clusterTime, List<FileSlice>[] fileSlices) throws IOException {
    HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan(EXECUTION_STRATEGY_CLASS_NAME.defaultValue(), STRATEGY_PARAMS, fileSlices, Collections.emptyMap());
    HoodieInstant clusteringInstant = new HoodieInstant(REQUESTED, REPLACE_COMMIT_ACTION, clusterTime);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
    metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
    return clusteringInstant;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 14 with HoodieClusteringPlan

use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testPendingClusteringRollback.

@Test
public void testPendingClusteringRollback() throws Exception {
    boolean populateMetaFields = true;
    // setup clustering config.
    HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
    // start clustering, but don't commit
    List<HoodieRecord> allRecords = testInsertAndClustering(clusteringConfig, populateMetaFields, false);
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
    assertEquals(1, pendingClusteringPlans.size());
    HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
    // complete another commit after pending clustering
    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(EAGER);
    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
    HoodieWriteConfig config = cfgBuilder.build();
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    dataGen = new HoodieTestDataGenerator();
    String commitTime = HoodieActiveTimeline.createNewInstantTime();
    allRecords.addAll(dataGen.generateInserts(commitTime, 200));
    assertThrows(HoodieUpsertException.class, () -> writeAndVerifyBatch(client, allRecords, commitTime, populateMetaFields));
    // verify pending clustering can be rolled back (even though there is a completed commit greater than pending clustering)
    client.rollback(pendingClusteringInstant.getTimestamp());
    metaClient.reloadActiveTimeline();
    // verify there are no pending clustering instants
    assertEquals(0, ClusteringUtils.getAllPendingClusteringPlans(metaClient).count());
    // delete rollback.completed instant to mimic failed rollback of clustering. and then trigger rollback of clustering again. same rollback instant should be used.
    HoodieInstant rollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
    FileCreateUtils.deleteRollbackCommit(metaClient.getBasePath(), rollbackInstant.getTimestamp());
    metaClient.reloadActiveTimeline();
    // create replace commit requested meta file so that rollback will not throw FileNotFoundException
    // create file slice with instantTime 001 and build clustering plan including this created 001 file slice.
    HoodieClusteringPlan clusteringPlan = ClusteringTestUtils.createClusteringPlan(metaClient, pendingClusteringInstant.getTimestamp(), "1");
    // create requested replace commit
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
    FileCreateUtils.createRequestedReplaceCommit(metaClient.getBasePath(), pendingClusteringInstant.getTimestamp(), Option.of(requestedReplaceMetadata));
    // trigger clustering again. no new rollback instants should be generated.
    try {
        client.cluster(pendingClusteringInstant.getTimestamp(), false);
    // new replace commit metadata generated is fake one. so, clustering will fail. but the intention of test is ot check for duplicate rollback instants.
    } catch (Exception e) {
    // ignore.
    }
    metaClient.reloadActiveTimeline();
    // verify that there is no new rollback instant generated
    HoodieInstant newRollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
    assertEquals(rollbackInstant.getTimestamp(), newRollbackInstant.getTimestamp());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) IOException(java.io.IOException) HoodieCorruptedDataException(org.apache.hudi.exception.HoodieCorruptedDataException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) Pair(org.apache.hudi.common.util.collection.Pair) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieClusteringPlan (org.apache.hudi.avro.model.HoodieClusteringPlan)14 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)10 List (java.util.List)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)7 IOException (java.io.IOException)5 HoodieClusteringGroup (org.apache.hudi.avro.model.HoodieClusteringGroup)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Collectors (java.util.stream.Collectors)4 Stream (java.util.stream.Stream)4 HoodieSliceInfo (org.apache.hudi.avro.model.HoodieSliceInfo)4 FileSlice (org.apache.hudi.common.model.FileSlice)4 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)4 LogManager (org.apache.log4j.LogManager)4 Test (org.junit.jupiter.api.Test)4 Arrays (java.util.Arrays)3 Map (java.util.Map)3 FSUtils (org.apache.hudi.common.fs.FSUtils)3 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)3 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)3