use of org.apache.hudi.table.action.HoodieWriteMetadata in project hudi by apache.
the class SparkBulkInsertHelper method bulkInsert.
@Override
public HoodieWriteMetadata<HoodieData<WriteStatus>> bulkInsert(final HoodieData<HoodieRecord<T>> inputRecords, final String instantTime, final HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> table, final HoodieWriteConfig config, final BaseCommitActionExecutor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, R> executor, final boolean performDedupe, final Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner) {
HoodieWriteMetadata result = new HoodieWriteMetadata();
// transition bulk_insert state to inflight
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(HoodieInstant.State.REQUESTED, executor.getCommitActionType(), instantTime), Option.empty(), config.shouldAllowMultiWriteOnSameInstant());
// write new files
HoodieData<WriteStatus> writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, userDefinedBulkInsertPartitioner, false, config.getBulkInsertShuffleParallelism(), new CreateHandleFactory(false));
// update index
((BaseSparkCommitActionExecutor) executor).updateIndexAndCommitIfNeeded(writeStatuses, result);
return result;
}
use of org.apache.hudi.table.action.HoodieWriteMetadata in project hudi by apache.
the class SparkValidatorUtils method getRecordsFromPendingCommits.
/**
* Get reads from partitions modified including any inflight commits.
* Note that this only works for COW tables
*/
public static Dataset<Row> getRecordsFromPendingCommits(SQLContext sqlContext, Set<String> partitionsAffected, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata, HoodieTable table, String instantTime) {
// build file system view with pending commits
HoodieTablePreCommitFileSystemView fsView = new HoodieTablePreCommitFileSystemView(table.getMetaClient(), table.getHoodieView(), writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(), instantTime);
List<String> newFiles = partitionsAffected.stream().flatMap(partition -> fsView.getLatestBaseFiles(partition).map(BaseFile::getPath)).collect(Collectors.toList());
if (newFiles.isEmpty()) {
return sqlContext.emptyDataFrame();
}
return readRecordsForBaseFiles(sqlContext, newFiles);
}
Aggregations