use of org.apache.hudi.exception.HoodieValidationException in project hudi by apache.
the class HoodieTableFactory method setupHoodieKeyOptions.
/**
* Sets up the hoodie key options (e.g. record key and partition key) from the table definition.
*/
private static void setupHoodieKeyOptions(Configuration conf, CatalogTable table) {
List<String> pkColumns = table.getSchema().getPrimaryKey().map(UniqueConstraint::getColumns).orElse(Collections.emptyList());
if (pkColumns.size() > 0) {
// the PRIMARY KEY syntax always has higher priority than option FlinkOptions#RECORD_KEY_FIELD
String recordKey = String.join(",", pkColumns);
conf.setString(FlinkOptions.RECORD_KEY_FIELD, recordKey);
}
List<String> partitionKeys = table.getPartitionKeys();
if (partitionKeys.size() > 0) {
// the PARTITIONED BY syntax always has higher priority than option FlinkOptions#PARTITION_PATH_FIELD
conf.setString(FlinkOptions.PARTITION_PATH_FIELD, String.join(",", partitionKeys));
}
// set index key for bucket index if not defined
if (conf.getString(FlinkOptions.INDEX_TYPE).equals(HoodieIndex.IndexType.BUCKET.name()) && conf.getString(FlinkOptions.INDEX_KEY_FIELD).isEmpty()) {
conf.setString(FlinkOptions.INDEX_KEY_FIELD, conf.getString(FlinkOptions.RECORD_KEY_FIELD));
}
// tweak the key gen class if possible
final String[] partitions = conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",");
final String[] pks = conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(",");
if (partitions.length == 1) {
final String partitionField = partitions[0];
if (partitionField.isEmpty()) {
conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, NonpartitionedAvroKeyGenerator.class.getName());
LOG.info("Table option [{}] is reset to {} because this is a non-partitioned table", FlinkOptions.KEYGEN_CLASS_NAME.key(), NonpartitionedAvroKeyGenerator.class.getName());
return;
}
DataType partitionFieldType = table.getSchema().getFieldDataType(partitionField).orElseThrow(() -> new HoodieValidationException("Field " + partitionField + " does not exist"));
if (pks.length <= 1 && DataTypeUtils.isDatetimeType(partitionFieldType)) {
// timestamp based key gen only supports simple primary key
setupTimestampKeygenOptions(conf, partitionFieldType);
return;
}
}
boolean complexHoodieKey = pks.length > 1 || partitions.length > 1;
if (complexHoodieKey && FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.KEYGEN_CLASS_NAME)) {
conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, ComplexAvroKeyGenerator.class.getName());
LOG.info("Table option [{}] is reset to {} because record key or partition path has two or more fields", FlinkOptions.KEYGEN_CLASS_NAME.key(), ComplexAvroKeyGenerator.class.getName());
}
}
use of org.apache.hudi.exception.HoodieValidationException in project hudi by apache.
the class SparkValidatorUtils method runValidators.
/**
* Check configured pre-commit validators and run them. Note that this only works for COW tables
*
* Throw error if there are validation failures.
*/
public static void runValidators(HoodieWriteConfig config, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata, HoodieEngineContext context, HoodieTable table, String instantTime) {
if (StringUtils.isNullOrEmpty(config.getPreCommitValidators())) {
LOG.info("no validators configured.");
} else {
if (!writeMetadata.getWriteStats().isPresent()) {
writeMetadata.setWriteStats(writeMetadata.getWriteStatuses().map(WriteStatus::getStat).collectAsList());
}
Set<String> partitionsModified = writeMetadata.getWriteStats().get().stream().map(writeStats -> writeStats.getPartitionPath()).collect(Collectors.toSet());
SQLContext sqlContext = new SQLContext(HoodieSparkEngineContext.getSparkContext(context));
// Refresh timeline to ensure validator sees the any other operations done on timeline (async operations such as other clustering/compaction/rollback)
table.getMetaClient().reloadActiveTimeline();
Dataset<Row> beforeState = getRecordsFromCommittedFiles(sqlContext, partitionsModified, table).cache();
Dataset<Row> afterState = getRecordsFromPendingCommits(sqlContext, partitionsModified, writeMetadata, table, instantTime).cache();
Stream<SparkPreCommitValidator> validators = Arrays.stream(config.getPreCommitValidators().split(",")).map(validatorClass -> {
return ((SparkPreCommitValidator) ReflectionUtils.loadClass(validatorClass, new Class<?>[] { HoodieSparkTable.class, HoodieEngineContext.class, HoodieWriteConfig.class }, table, context, config));
});
boolean allSuccess = validators.map(v -> runValidatorAsync(v, writeMetadata, beforeState, afterState, instantTime)).map(CompletableFuture::join).reduce(true, Boolean::logicalAnd);
if (allSuccess) {
LOG.info("All validations succeeded");
} else {
LOG.error("At least one pre-commit validation failed");
throw new HoodieValidationException("At least one pre-commit validation failed");
}
}
}
use of org.apache.hudi.exception.HoodieValidationException in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testClusteringInvalidConfigForSqlQuerySingleResultValidatorFailure.
@Test
public void testClusteringInvalidConfigForSqlQuerySingleResultValidatorFailure() throws Exception {
// setup clustering config.
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
try {
testInsertAndClustering(clusteringConfig, false, true, false, SqlQuerySingleResultPreCommitValidator.class.getName(), "", COUNT_SQL_QUERY_FOR_VALIDATION + "#802");
fail("expected pre-commit clustering validation to fail because of count mismatch. expect 400 rows, not 802");
} catch (HoodieValidationException e) {
// expected
}
}
use of org.apache.hudi.exception.HoodieValidationException in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testPreCommitValidationWithMultipleInflights.
@Test
public void testPreCommitValidationWithMultipleInflights() throws Exception {
int numRecords = 200;
HoodiePreCommitValidatorConfig validatorConfig = HoodiePreCommitValidatorConfig.newBuilder().withPreCommitValidator(SqlQuerySingleResultPreCommitValidator.class.getName()).withPrecommitValidatorSingleResultSqlQueries(COUNT_SQL_QUERY_FOR_VALIDATION + "#" + 500).build();
HoodieWriteConfig config = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.NEVER).build()).withPreCommitValidatorConfig(validatorConfig).build();
String instant1 = HoodieActiveTimeline.createNewInstantTime();
try {
insertWithConfig(config, numRecords, instant1);
fail("Expected validation to fail because we only insert 200 rows. Validation is configured to expect 500 rows");
} catch (HoodieInsertException e) {
if (e.getCause() instanceof HoodieValidationException) {
// expected because wrong value passed
} else {
throw e;
}
}
assertFalse(testTable.commitExists(instant1));
assertTrue(testTable.inflightCommitExists(instant1));
numRecords = 300;
validatorConfig = HoodiePreCommitValidatorConfig.newBuilder().withPreCommitValidator(SqlQuerySingleResultPreCommitValidator.class.getName()).withPrecommitValidatorSingleResultSqlQueries(COUNT_SQL_QUERY_FOR_VALIDATION + "#" + numRecords).build();
config = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.NEVER).build()).withPreCommitValidatorConfig(validatorConfig).build();
String instant2 = HoodieActiveTimeline.createNewInstantTime();
// expect pre-commit validators to succeed. Note that validator is expected to exclude inflight instant1
insertWithConfig(config, numRecords, instant2);
assertTrue(testTable.inflightCommitExists(instant1));
assertTrue(testTable.commitExists(instant2));
}
use of org.apache.hudi.exception.HoodieValidationException in project hudi by apache.
the class HoodieMetadataTableValidator method doMetadataTableValidation.
public void doMetadataTableValidation() {
boolean finalResult = true;
metaClient.reloadActiveTimeline();
String basePath = metaClient.getBasePath();
Set<String> baseFilesForCleaning = Collections.emptySet();
if (cfg.skipDataFilesForCleaning) {
HoodieTimeline inflightCleaningTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterInflights();
baseFilesForCleaning = inflightCleaningTimeline.getInstants().flatMap(instant -> {
try {
// convert inflight instant to requested and get clean plan
instant = new HoodieInstant(HoodieInstant.State.REQUESTED, instant.getAction(), instant.getTimestamp());
HoodieCleanerPlan cleanerPlan = CleanerUtils.getCleanerPlan(metaClient, instant);
return cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().flatMap(cleanerFileInfoList -> {
return cleanerFileInfoList.stream().map(fileInfo -> {
return new Path(fileInfo.getFilePath()).getName();
});
});
} catch (IOException e) {
throw new HoodieIOException("Error reading cleaner metadata for " + instant);
}
// only take care of base files here.
}).filter(path -> {
String fileExtension = FSUtils.getFileExtension(path);
return HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(fileExtension);
}).collect(Collectors.toSet());
}
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<String> allPartitions = validatePartitions(engineContext, basePath);
HoodieMetadataValidationContext metadataTableBasedContext = new HoodieMetadataValidationContext(engineContext, cfg, metaClient, true);
HoodieMetadataValidationContext fsBasedContext = new HoodieMetadataValidationContext(engineContext, cfg, metaClient, false);
Set<String> finalBaseFilesForCleaning = baseFilesForCleaning;
List<Boolean> result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
try {
validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning);
LOG.info("Metadata table validation succeeded for " + partitionPath);
return true;
} catch (HoodieValidationException e) {
LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e);
if (!cfg.ignoreFailed) {
throw e;
}
return false;
}
}).collectAsList();
for (Boolean res : result) {
finalResult &= res;
}
if (finalResult) {
LOG.info("Metadata table validation succeeded.");
} else {
LOG.warn("Metadata table validation failed.");
}
}
Aggregations