use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class ClusteringPlanActionExecutor method execute.
@Override
public Option<HoodieClusteringPlan> execute() {
Option<HoodieClusteringPlan> planOption = createClusteringPlan();
if (planOption.isPresent()) {
HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime);
try {
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setOperationType(WriteOperationType.CLUSTER.name()).setExtraMetadata(extraMetadata.orElse(Collections.emptyMap())).setClusteringPlan(planOption.get()).build();
table.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
} catch (IOException ioe) {
throw new HoodieIOException("Exception scheduling clustering", ioe);
}
}
return planOption;
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class ClusteringUtils method createClusteringPlan.
/**
* Create clustering plan from input fileSliceGroups.
*/
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName, Map<String, String> strategyParams, List<FileSlice>[] fileSliceGroups, Map<String, String> extraMetadata) {
List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
}).collect(Collectors.toList());
HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(strategyClassName).setStrategyParams(strategyParams).build();
return HoodieClusteringPlan.newBuilder().setInputGroups(clusteringGroups).setExtraMetadata(extraMetadata).setStrategy(strategy).build();
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method createRequestedReplaceInstant.
protected HoodieInstant createRequestedReplaceInstant(HoodieTableMetaClient metaClient, String clusterTime, List<FileSlice>[] fileSlices) throws IOException {
HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan(EXECUTION_STRATEGY_CLASS_NAME.defaultValue(), STRATEGY_PARAMS, fileSlices, Collections.emptyMap());
HoodieInstant clusteringInstant = new HoodieInstant(REQUESTED, REPLACE_COMMIT_ACTION, clusterTime);
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
return clusteringInstant;
}
use of org.apache.hudi.avro.model.HoodieClusteringPlan in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testPendingClusteringRollback.
@Test
public void testPendingClusteringRollback() throws Exception {
boolean populateMetaFields = true;
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
// start clustering, but don't commit
List<HoodieRecord> allRecords = testInsertAndClustering(clusteringConfig, populateMetaFields, false);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
assertEquals(1, pendingClusteringPlans.size());
HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
// complete another commit after pending clustering
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(EAGER);
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
HoodieWriteConfig config = cfgBuilder.build();
SparkRDDWriteClient client = getHoodieWriteClient(config);
dataGen = new HoodieTestDataGenerator();
String commitTime = HoodieActiveTimeline.createNewInstantTime();
allRecords.addAll(dataGen.generateInserts(commitTime, 200));
assertThrows(HoodieUpsertException.class, () -> writeAndVerifyBatch(client, allRecords, commitTime, populateMetaFields));
// verify pending clustering can be rolled back (even though there is a completed commit greater than pending clustering)
client.rollback(pendingClusteringInstant.getTimestamp());
metaClient.reloadActiveTimeline();
// verify there are no pending clustering instants
assertEquals(0, ClusteringUtils.getAllPendingClusteringPlans(metaClient).count());
// delete rollback.completed instant to mimic failed rollback of clustering. and then trigger rollback of clustering again. same rollback instant should be used.
HoodieInstant rollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
FileCreateUtils.deleteRollbackCommit(metaClient.getBasePath(), rollbackInstant.getTimestamp());
metaClient.reloadActiveTimeline();
// create replace commit requested meta file so that rollback will not throw FileNotFoundException
// create file slice with instantTime 001 and build clustering plan including this created 001 file slice.
HoodieClusteringPlan clusteringPlan = ClusteringTestUtils.createClusteringPlan(metaClient, pendingClusteringInstant.getTimestamp(), "1");
// create requested replace commit
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
FileCreateUtils.createRequestedReplaceCommit(metaClient.getBasePath(), pendingClusteringInstant.getTimestamp(), Option.of(requestedReplaceMetadata));
// trigger clustering again. no new rollback instants should be generated.
try {
client.cluster(pendingClusteringInstant.getTimestamp(), false);
// new replace commit metadata generated is fake one. so, clustering will fail. but the intention of test is ot check for duplicate rollback instants.
} catch (Exception e) {
// ignore.
}
metaClient.reloadActiveTimeline();
// verify that there is no new rollback instant generated
HoodieInstant newRollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
assertEquals(rollbackInstant.getTimestamp(), newRollbackInstant.getTimestamp());
}
Aggregations