use of org.apache.hudi.common.data.HoodieData in project hudi by apache.
the class SparkValidatorUtils method getRecordsFromPendingCommits.
/**
* Get reads from partitions modified including any inflight commits.
* Note that this only works for COW tables
*/
public static Dataset<Row> getRecordsFromPendingCommits(SQLContext sqlContext, Set<String> partitionsAffected, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata, HoodieTable table, String instantTime) {
// build file system view with pending commits
HoodieTablePreCommitFileSystemView fsView = new HoodieTablePreCommitFileSystemView(table.getMetaClient(), table.getHoodieView(), writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(), instantTime);
List<String> newFiles = partitionsAffected.stream().flatMap(partition -> fsView.getLatestBaseFiles(partition).map(BaseFile::getPath)).collect(Collectors.toList());
if (newFiles.isEmpty()) {
return sqlContext.emptyDataFrame();
}
return readRecordsForBaseFiles(sqlContext, newFiles);
}
Aggregations