use of org.apache.hudi.integ.testsuite.dag.ExecutionContext in project hudi by apache.
the class ValidateAsyncOperations method execute.
@Override
public void execute(ExecutionContext executionContext, int curItrCount) throws Exception {
if (config.getIterationCountToExecute() == curItrCount) {
try {
log.warn("Executing ValidateHoodieAsyncOperations node {} with target base path {} ", this.getName(), executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath);
String basePath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath;
int maxCommitsRetained = executionContext.getHoodieTestSuiteWriter().getWriteConfig().getCleanerCommitsRetained() + 1;
FileSystem fs = FSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration());
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath).setConf(executionContext.getJsc().hadoopConfiguration()).build();
Option<HoodieInstant> latestCleanInstant = metaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.CLEAN_ACTION)).lastInstant();
if (latestCleanInstant.isPresent()) {
log.warn("Latest clean commit " + latestCleanInstant.get());
HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, latestCleanInstant.get());
String earliestCommitToRetain = cleanMetadata.getEarliestCommitToRetain();
log.warn("Earliest commit to retain : " + earliestCommitToRetain);
long unCleanedInstants = metaClient.getActiveTimeline().filterCompletedInstants().filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, earliestCommitToRetain)).getInstants().count();
ValidationUtils.checkArgument(unCleanedInstants >= (maxCommitsRetained + 1), "Total uncleaned instants " + unCleanedInstants + " mismatched with max commits retained " + (maxCommitsRetained + 1));
}
if (config.validateArchival() || config.validateClean()) {
final Pattern ARCHIVE_FILE_PATTERN = Pattern.compile("\\.commits_\\.archive\\..*");
final Pattern CLEAN_FILE_PATTERN = Pattern.compile(".*\\.clean\\..*");
String metadataPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie";
FileStatus[] metaFileStatuses = fs.listStatus(new Path(metadataPath));
boolean cleanFound = false;
for (FileStatus fileStatus : metaFileStatuses) {
Matcher cleanFileMatcher = CLEAN_FILE_PATTERN.matcher(fileStatus.getPath().getName());
if (cleanFileMatcher.matches()) {
cleanFound = true;
break;
}
}
String archivalPath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/.hoodie/archived";
metaFileStatuses = fs.listStatus(new Path(archivalPath));
boolean archFound = false;
for (FileStatus fileStatus : metaFileStatuses) {
Matcher archFileMatcher = ARCHIVE_FILE_PATTERN.matcher(fileStatus.getPath().getName());
if (archFileMatcher.matches()) {
archFound = true;
}
}
if (config.validateArchival() && !archFound) {
throw new AssertionError("Archival NotFound in " + metadataPath);
}
if (config.validateClean() && !cleanFound) {
throw new AssertionError("Clean commits NotFound in " + metadataPath);
}
}
} catch (Exception e) {
log.warn("Exception thrown in ValidateHoodieAsyncOperations Node :: " + e.getCause() + ", msg :: " + e.getMessage());
throw e;
}
}
}
use of org.apache.hudi.integ.testsuite.dag.ExecutionContext in project hudi by apache.
the class BaseValidateDatasetNode method getInputDf.
private Dataset<Row> getInputDf(ExecutionContext context, SparkSession session, String inputPath) {
String recordKeyField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key());
String partitionPathField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.PARTITIONPATH_FIELD().key());
// todo: fix hard coded fields from configs.
// read input and resolve insert, updates, etc.
Dataset<Row> inputDf = session.read().format("avro").load(inputPath);
ExpressionEncoder encoder = getEncoder(inputDf.schema());
return inputDf.groupByKey((MapFunction<Row, String>) value -> value.getAs(partitionPathField) + "+" + value.getAs(recordKeyField), Encoders.STRING()).reduceGroups((ReduceFunction<Row>) (v1, v2) -> {
int ts1 = v1.getAs(SchemaUtils.SOURCE_ORDERING_FIELD);
int ts2 = v2.getAs(SchemaUtils.SOURCE_ORDERING_FIELD);
if (ts1 > ts2) {
return v1;
} else {
return v2;
}
}).map((MapFunction<Tuple2<String, Row>, Row>) value -> value._2, encoder).filter("_hoodie_is_deleted != true");
}
Aggregations