Search in sources :

Example 16 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class BaseSparkCommitActionExecutor method mapPartitionsAsRDD.

private HoodieData<WriteStatus> mapPartitionsAsRDD(HoodieData<HoodieRecord<T>> dedupedRecords, Partitioner partitioner) {
    JavaPairRDD<Tuple2<HoodieKey, Option<HoodieRecordLocation>>, HoodieRecord<T>> mappedRDD = HoodieJavaPairRDD.getJavaPairRDD(dedupedRecords.mapToPair(record -> Pair.of(new Tuple2<>(record.getKey(), Option.ofNullable(record.getCurrentLocation())), record)));
    JavaPairRDD<Tuple2<HoodieKey, Option<HoodieRecordLocation>>, HoodieRecord<T>> partitionedRDD;
    if (table.requireSortedRecords()) {
        // Partition and sort within each partition as a single step. This is faster than partitioning first and then
        // applying a sort.
        Comparator<Tuple2<HoodieKey, Option<HoodieRecordLocation>>> comparator = (Comparator<Tuple2<HoodieKey, Option<HoodieRecordLocation>>> & Serializable) (t1, t2) -> {
            HoodieKey key1 = t1._1;
            HoodieKey key2 = t2._1;
            return key1.getRecordKey().compareTo(key2.getRecordKey());
        };
        partitionedRDD = mappedRDD.repartitionAndSortWithinPartitions(partitioner, comparator);
    } else {
        // Partition only
        partitionedRDD = mappedRDD.partitionBy(partitioner);
    }
    return HoodieJavaRDD.of(partitionedRDD.map(Tuple2::_2).mapPartitionsWithIndex((partition, recordItr) -> {
        if (WriteOperationType.isChangingRecords(operationType)) {
            return handleUpsertPartition(instantTime, partition, recordItr, partitioner);
        } else {
            return handleInsertPartition(instantTime, partition, recordItr, partitioner);
        }
    }, true).flatMap(List::iterator));
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) CreateHandleFactory(org.apache.hudi.io.CreateHandleFactory) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Logger(org.apache.log4j.Logger) HoodieMergeHandle(org.apache.hudi.io.HoodieMergeHandle) Partitioner(org.apache.spark.Partitioner) StorageLevel(org.apache.spark.storage.StorageLevel) Duration(java.time.Duration) Map(java.util.Map) HoodieSortedMergeHandle(org.apache.hudi.io.HoodieSortedMergeHandle) WorkloadProfile(org.apache.hudi.table.WorkloadProfile) HoodieConcatHandle(org.apache.hudi.io.HoodieConcatHandle) WorkloadStat(org.apache.hudi.table.WorkloadStat) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) Set(java.util.Set) Instant(java.time.Instant) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) WRITE_STATUS_STORAGE_LEVEL_VALUE(org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) ClusteringUtils.getAllFileGroupsInPendingClusteringPlans(org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans) UpdateStrategy(org.apache.hudi.table.action.cluster.strategy.UpdateStrategy) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) CommitUtils(org.apache.hudi.common.util.CommitUtils) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkValidatorUtils(org.apache.hudi.client.utils.SparkValidatorUtils) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) SparkLazyInsertIterable(org.apache.hudi.execution.SparkLazyInsertIterable) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) HoodieJavaPairRDD(org.apache.hudi.data.HoodieJavaPairRDD) Pair(org.apache.hudi.common.util.collection.Pair) Tuple2(scala.Tuple2) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Example 17 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SparkBulkInsertHelper method bulkInsert.

@Override
public HoodieData<WriteStatus> bulkInsert(HoodieData<HoodieRecord<T>> inputRecords, String instantTime, HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> table, HoodieWriteConfig config, boolean performDedupe, Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner, boolean useWriterSchema, int parallelism, WriteHandleFactory writeHandleFactory) {
    // De-dupe/merge if needed
    HoodieData<HoodieRecord<T>> dedupedRecords = inputRecords;
    if (performDedupe) {
        dedupedRecords = (HoodieData<HoodieRecord<T>>) HoodieWriteHelper.newInstance().combineOnCondition(config.shouldCombineBeforeInsert(), inputRecords, parallelism, table);
    }
    final HoodieData<HoodieRecord<T>> repartitionedRecords;
    BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.isPresent() ? userDefinedBulkInsertPartitioner.get() : BulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode());
    // only JavaRDD is supported for Spark partitioner, but it is not enforced by BulkInsertPartitioner API. To improve this, TODO HUDI-3463
    repartitionedRecords = HoodieJavaRDD.of((JavaRDD<HoodieRecord<T>>) partitioner.repartitionRecords(HoodieJavaRDD.getJavaRDD(dedupedRecords), parallelism));
    // generate new file ID prefixes for each output partition
    final List<String> fileIDPrefixes = IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList());
    JavaRDD<WriteStatus> writeStatusRDD = HoodieJavaRDD.getJavaRDD(repartitionedRecords).mapPartitionsWithIndex(new BulkInsertMapFunction<>(instantTime, partitioner.arePartitionRecordsSorted(), config, table, fileIDPrefixes, useWriterSchema, writeHandleFactory), true).flatMap(List::iterator);
    return HoodieJavaRDD.of(writeStatusRDD);
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CreateHandleFactory(org.apache.hudi.io.CreateHandleFactory) Option(org.apache.hudi.common.util.Option) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Collectors(java.util.stream.Collectors) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) WriteHandleFactory(org.apache.hudi.io.WriteHandleFactory) List(java.util.List) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) BulkInsertInternalPartitionerFactory(org.apache.hudi.execution.bulkinsert.BulkInsertInternalPartitionerFactory) BulkInsertMapFunction(org.apache.hudi.execution.bulkinsert.BulkInsertMapFunction) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) FSUtils(org.apache.hudi.common.fs.FSUtils) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BulkInsertMapFunction(org.apache.hudi.execution.bulkinsert.BulkInsertMapFunction) List(java.util.List) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 18 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SparkDeletePartitionCommitActionExecutor method execute.

@Override
public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
    HoodieTimer timer = new HoodieTimer().startTimer();
    context.setJobStatus(this.getClass().getSimpleName(), "Gather all file ids from all deleting partitions.");
    Map<String, List<String>> partitionToReplaceFileIds = HoodieJavaPairRDD.getJavaPairRDD(context.parallelize(partitions).distinct().mapToPair(partitionPath -> Pair.of(partitionPath, getAllExistingFileIds(partitionPath)))).collectAsMap();
    HoodieWriteMetadata<HoodieData<WriteStatus>> result = new HoodieWriteMetadata<>();
    result.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
    result.setIndexUpdateDuration(Duration.ofMillis(timer.endTimer()));
    result.setWriteStatuses(context.emptyHoodieData());
    this.saveWorkloadProfileMetadataToInflight(new WorkloadProfile(Pair.of(new HashMap<>(), new WorkloadStat())), instantTime);
    this.commitOnAutoCommit(result);
    return result;
}
Also used : HoodieData(org.apache.hudi.common.data.HoodieData) WorkloadProfile(org.apache.hudi.table.WorkloadProfile) WorkloadStat(org.apache.hudi.table.WorkloadStat) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) List(java.util.List) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata)

Example 19 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class HoodieBackedTableMetadataWriter method initialCommit.

/**
 * This is invoked to initialize metadata table for a dataset. Bootstrap Commit has special handling mechanism due to its scale compared to
 * other regular commits.
 */
private void initialCommit(String createInstantTime) {
    // List all partitions in the basePath of the containing dataset
    LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
    engineContext.setJobStatus(this.getClass().getSimpleName(), "Initializing metadata table by listing files and partitions");
    List<DirectoryInfo> partitionInfoList = listAllPartitions(dataMetaClient);
    List<String> partitions = new ArrayList<>();
    AtomicLong totalFiles = new AtomicLong(0);
    Map<String, Map<String, Long>> partitionToFilesMap = partitionInfoList.stream().map(p -> {
        final String partitionName = HoodieTableMetadataUtil.getPartition(p.getRelativePath());
        partitions.add(partitionName);
        totalFiles.addAndGet(p.getTotalFiles());
        return Pair.of(partitionName, p.getFileNameToSizeMap());
    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
    // Record which saves the list of all partitions
    HoodieRecord allPartitionRecord = HoodieMetadataPayload.createPartitionListRecord(partitions);
    if (partitions.isEmpty()) {
        // in case of initializing of a fresh table, there won't be any partitions, but we need to make a boostrap commit
        final HoodieData<HoodieRecord> allPartitionRecordsRDD = engineContext.parallelize(Collections.singletonList(allPartitionRecord), 1);
        partitionToRecordsMap.put(MetadataPartitionType.FILES, allPartitionRecordsRDD);
        commit(createInstantTime, partitionToRecordsMap, false);
        return;
    }
    HoodieData<HoodieRecord> filesPartitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
    if (!partitionInfoList.isEmpty()) {
        HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
            Map<String, Long> fileNameToSizeMap = partitionInfo.getFileNameToSizeMap();
            // filter for files that are part of the completed commits
            Map<String, Long> validFileNameToSizeMap = fileNameToSizeMap.entrySet().stream().filter(fileSizePair -> {
                String commitTime = FSUtils.getCommitTime(fileSizePair.getKey());
                return HoodieTimeline.compareTimestamps(commitTime, HoodieTimeline.LESSER_THAN_OR_EQUALS, createInstantTime);
            }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
            // Record which saves files within a partition
            return HoodieMetadataPayload.createPartitionFilesRecord(HoodieTableMetadataUtil.getPartition(partitionInfo.getRelativePath()), Option.of(validFileNameToSizeMap), Option.empty());
        });
        filesPartitionRecords = filesPartitionRecords.union(fileListRecords);
    }
    ValidationUtils.checkState(filesPartitionRecords.count() == (partitions.size() + 1));
    partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
        final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
        partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
    }
    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
        final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
        partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
    }
    LOG.info("Committing " + partitions.size() + " partitions and " + totalFiles + " files to metadata");
    commit(createInstantTime, partitionToRecordsMap, false);
}
Also used : ARCHIVELOG_FOLDER(org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER) Arrays(java.util.Arrays) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) SOLO_COMMIT_TIMESTAMP(org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) WriteConcurrencyMode(org.apache.hudi.common.model.WriteConcurrencyMode) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BaseHoodieWriteClient(org.apache.hudi.client.BaseHoodieWriteClient) HoodieMetricsConfig(org.apache.hudi.config.metrics.HoodieMetricsConfig) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LinkedList(java.util.LinkedList) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) SpecificRecordBase(org.apache.avro.specific.SpecificRecordBase) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) METADATA_TABLE_NAME_SUFFIX(org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX) HoodieMetricsJmxConfig(org.apache.hudi.config.metrics.HoodieMetricsJmxConfig) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) AtomicLong(java.util.concurrent.atomic.AtomicLong) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieKey(org.apache.hudi.common.model.HoodieKey) LogManager(org.apache.log4j.LogManager) HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieMetricsGraphiteConfig(org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig) Pair(org.apache.hudi.common.util.collection.Pair) HoodieData(org.apache.hudi.common.data.HoodieData) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) Map(java.util.Map) HashMap(java.util.HashMap)

Example 20 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class HoodieBackedTableMetadataWriter method prepRecords.

/**
 * Tag each record with the location in the given partition.
 * The record is tagged with respective file slice's location based on its record key.
 */
protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap) {
    // The result set
    HoodieData<HoodieRecord> allPartitionRecords = engineContext.emptyHoodieData();
    HoodieTableFileSystemView fsView = HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
    for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : partitionRecordsMap.entrySet()) {
        final String partitionName = entry.getKey().getPartitionPath();
        final int fileGroupCount = entry.getKey().getFileGroupCount();
        HoodieData<HoodieRecord> records = entry.getValue();
        List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
        ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount, String.format("Invalid number of file groups for partition:%s, found=%d, required=%d", partitionName, fileSlices.size(), fileGroupCount));
        HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
            FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), fileGroupCount));
            r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
            return r;
        });
        allPartitionRecords = allPartitionRecords.union(rddSinglePartitionRecords);
    }
    return allPartitionRecords;
}
Also used : HoodieData(org.apache.hudi.common.data.HoodieData) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

HoodieData (org.apache.hudi.common.data.HoodieData)36 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)24 WriteStatus (org.apache.hudi.client.WriteStatus)22 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)22 List (java.util.List)21 HoodieTable (org.apache.hudi.table.HoodieTable)20 HoodieKey (org.apache.hudi.common.model.HoodieKey)18 LogManager (org.apache.log4j.LogManager)18 Logger (org.apache.log4j.Logger)18 IOException (java.io.IOException)17 Collectors (java.util.stream.Collectors)17 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)17 Option (org.apache.hudi.common.util.Option)17 Map (java.util.Map)16 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)16 HoodieWriteMetadata (org.apache.hudi.table.action.HoodieWriteMetadata)16 JavaRDD (org.apache.spark.api.java.JavaRDD)16 Pair (org.apache.hudi.common.util.collection.Pair)15 HoodieJavaRDD (org.apache.hudi.data.HoodieJavaRDD)15 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)14