Search in sources :

Example 1 with HoodieTablePreCommitFileSystemView

use of org.apache.hudi.common.table.view.HoodieTablePreCommitFileSystemView in project hudi by apache.

the class SparkValidatorUtils method getRecordsFromPendingCommits.

/**
 * Get reads from partitions modified including any inflight commits.
 * Note that this only works for COW tables
 */
public static Dataset<Row> getRecordsFromPendingCommits(SQLContext sqlContext, Set<String> partitionsAffected, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata, HoodieTable table, String instantTime) {
    // build file system view with pending commits
    HoodieTablePreCommitFileSystemView fsView = new HoodieTablePreCommitFileSystemView(table.getMetaClient(), table.getHoodieView(), writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(), instantTime);
    List<String> newFiles = partitionsAffected.stream().flatMap(partition -> fsView.getLatestBaseFiles(partition).map(BaseFile::getPath)).collect(Collectors.toList());
    if (newFiles.isEmpty()) {
        return sqlContext.emptyDataFrame();
    }
    return readRecordsForBaseFiles(sqlContext, newFiles);
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) Dataset(org.apache.spark.sql.Dataset) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) SQLContext(org.apache.spark.sql.SQLContext) Set(java.util.Set) Row(org.apache.spark.sql.Row) Collectors(java.util.stream.Collectors) WriteStatus(org.apache.hudi.client.WriteStatus) SparkPreCommitValidator(org.apache.hudi.client.validator.SparkPreCommitValidator) List(java.util.List) Stream(java.util.stream.Stream) HoodieTablePreCommitFileSystemView(org.apache.hudi.common.table.view.HoodieTablePreCommitFileSystemView) JavaConverters(scala.collection.JavaConverters) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) LogManager(org.apache.log4j.LogManager) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTablePreCommitFileSystemView(org.apache.hudi.common.table.view.HoodieTablePreCommitFileSystemView)

Aggregations

Arrays (java.util.Arrays)1 List (java.util.List)1 Set (java.util.Set)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)1 SparkPreCommitValidator (org.apache.hudi.client.validator.SparkPreCommitValidator)1 HoodieData (org.apache.hudi.common.data.HoodieData)1 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)1 BaseFile (org.apache.hudi.common.model.BaseFile)1 HoodieTablePreCommitFileSystemView (org.apache.hudi.common.table.view.HoodieTablePreCommitFileSystemView)1 ReflectionUtils (org.apache.hudi.common.util.ReflectionUtils)1 StringUtils (org.apache.hudi.common.util.StringUtils)1 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)1 HoodieValidationException (org.apache.hudi.exception.HoodieValidationException)1 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)1 HoodieTable (org.apache.hudi.table.HoodieTable)1 HoodieWriteMetadata (org.apache.hudi.table.action.HoodieWriteMetadata)1