use of org.apache.hudi.config.HoodieClusteringConfig in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testInlineScheduleClustering.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testInlineScheduleClustering(boolean scheduleInlineClustering) throws IOException {
testInsertTwoBatches(true);
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(false).withScheduleInlineClustering(scheduleInlineClustering).withPreserveHoodieCommitMetadata(true).build();
HoodieWriteConfig config = getConfigBuilder(HoodieFailedWritesCleaningPolicy.LAZY).withAutoCommit(false).withClusteringConfig(clusteringConfig).withProps(getPropertiesForKeyGen()).build();
SparkRDDWriteClient client = getHoodieWriteClient(config);
dataGen = new HoodieTestDataGenerator(new String[] { "2015/03/16" });
String commitTime1 = HoodieActiveTimeline.createNewInstantTime();
List<HoodieRecord> records1 = dataGen.generateInserts(commitTime1, 200);
client.startCommitWithTime(commitTime1);
JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(records1, 2);
JavaRDD<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1);
List<WriteStatus> statusList = statuses.collect();
assertNoWriteErrors(statusList);
client.commit(commitTime1, statuses);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
if (scheduleInlineClustering) {
assertEquals(1, pendingClusteringPlans.size());
} else {
assertEquals(0, pendingClusteringPlans.size());
}
}
use of org.apache.hudi.config.HoodieClusteringConfig in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testClusteringInvalidConfigForSqlQuerySingleResultValidator.
@Test
public void testClusteringInvalidConfigForSqlQuerySingleResultValidator() throws Exception {
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
testInsertAndClustering(clusteringConfig, false, true, false, SqlQuerySingleResultPreCommitValidator.class.getName(), "", COUNT_SQL_QUERY_FOR_VALIDATION + "#400");
}
use of org.apache.hudi.config.HoodieClusteringConfig in project hudi by apache.
the class DeltaSync method getHoodieClientConfig.
/**
* Helper to construct Write Client config.
*
* @param schema Schema
*/
private HoodieWriteConfig getHoodieClientConfig(Schema schema) {
final boolean combineBeforeUpsert = true;
final boolean autoCommit = false;
// NOTE: Provided that we're injecting combined properties
// (from {@code props}, including CLI overrides), there's no
// need to explicitly set up some configuration aspects that
// are based on these (for ex Clustering configuration)
HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder().withPath(cfg.targetBasePath).combineInput(cfg.filterDupes, combineBeforeUpsert).withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).withInlineCompaction(cfg.isInlineCompactionEnabled()).build()).withPayloadConfig(HoodiePayloadConfig.newBuilder().withPayloadOrderingField(cfg.sourceOrderingField).build()).forTable(cfg.targetTableName).withAutoCommit(autoCommit).withProps(props);
if (schema != null) {
builder.withSchema(schema.toString());
}
HoodieWriteConfig config = builder.build();
if (config.writeCommitCallbackOn()) {
// set default value for {@link HoodieWriteCommitKafkaCallbackConfig} if needed.
if (HoodieWriteCommitKafkaCallback.class.getName().equals(config.getCallbackClass())) {
HoodieWriteCommitKafkaCallbackConfig.setCallbackKafkaConfigIfNeeded(config);
}
// set default value for {@link HoodieWriteCommitPulsarCallbackConfig} if needed.
if (HoodieWriteCommitPulsarCallback.class.getName().equals(config.getCallbackClass())) {
HoodieWriteCommitPulsarCallbackConfig.setCallbackPulsarConfigIfNeeded(config);
}
}
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.from(props);
// Validate what deltastreamer assumes of write-config to be really safe
ValidationUtils.checkArgument(config.inlineCompactionEnabled() == cfg.isInlineCompactionEnabled(), String.format("%s should be set to %s", INLINE_COMPACT.key(), cfg.isInlineCompactionEnabled()));
ValidationUtils.checkArgument(config.inlineClusteringEnabled() == clusteringConfig.isInlineClusteringEnabled(), String.format("%s should be set to %s", INLINE_CLUSTERING.key(), clusteringConfig.isInlineClusteringEnabled()));
ValidationUtils.checkArgument(config.isAsyncClusteringEnabled() == clusteringConfig.isAsyncClusteringEnabled(), String.format("%s should be set to %s", ASYNC_CLUSTERING_ENABLE.key(), clusteringConfig.isAsyncClusteringEnabled()));
ValidationUtils.checkArgument(!config.shouldAutoCommit(), String.format("%s should be set to %s", AUTO_COMMIT_ENABLE.key(), autoCommit));
ValidationUtils.checkArgument(config.shouldCombineBeforeInsert() == cfg.filterDupes, String.format("%s should be set to %s", COMBINE_BEFORE_INSERT.key(), cfg.filterDupes));
ValidationUtils.checkArgument(config.shouldCombineBeforeUpsert(), String.format("%s should be set to %s", COMBINE_BEFORE_UPSERT.key(), combineBeforeUpsert));
return config;
}
use of org.apache.hudi.config.HoodieClusteringConfig in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testClusteringWithSortColumns.
@ParameterizedTest
@MethodSource("populateMetaFieldsAndPreserveMetadataParams")
public void testClusteringWithSortColumns(boolean populateMetaFields, boolean preserveCommitMetadata) throws Exception {
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringSortColumns(populateMetaFields ? "_hoodie_record_key" : "_row_key").withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).withPreserveHoodieCommitMetadata(preserveCommitMetadata).build();
testInsertAndClustering(clusteringConfig, populateMetaFields, true, false, SqlQueryEqualityPreCommitValidator.class.getName(), COUNT_SQL_QUERY_FOR_VALIDATION, "");
}
use of org.apache.hudi.config.HoodieClusteringConfig in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testClusteringInvalidConfigForSqlQueryValidator.
@Test
public void testClusteringInvalidConfigForSqlQueryValidator() throws Exception {
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
try {
testInsertAndClustering(clusteringConfig, false, true, false, SqlQueryEqualityPreCommitValidator.class.getName(), "", "");
fail("expected pre-commit clustering validation to fail because sql query is not configured");
} catch (HoodieValidationException e) {
// expected
}
}
Aggregations