Search in sources :

Example 6 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SingleSparkJobExecutionStrategy method performClustering.

@Override
public HoodieWriteMetadata<HoodieData<WriteStatus>> performClustering(final HoodieClusteringPlan clusteringPlan, final Schema schema, final String instantTime) {
    JavaSparkContext engineContext = HoodieSparkEngineContext.getSparkContext(getEngineContext());
    final TaskContextSupplier taskContextSupplier = getEngineContext().getTaskContextSupplier();
    final SerializableSchema serializableSchema = new SerializableSchema(schema);
    final List<ClusteringGroupInfo> clusteringGroupInfos = clusteringPlan.getInputGroups().stream().map(clusteringGroup -> ClusteringGroupInfo.create(clusteringGroup)).collect(Collectors.toList());
    String umask = engineContext.hadoopConfiguration().get("fs.permissions.umask-mode");
    Broadcast<String> umaskBroadcastValue = engineContext.broadcast(umask);
    JavaRDD<ClusteringGroupInfo> groupInfoJavaRDD = engineContext.parallelize(clusteringGroupInfos, clusteringGroupInfos.size());
    LOG.info("number of partitions for clustering " + groupInfoJavaRDD.getNumPartitions());
    JavaRDD<WriteStatus> writeStatusRDD = groupInfoJavaRDD.mapPartitions(clusteringOps -> {
        Configuration configuration = new Configuration();
        configuration.set("fs.permissions.umask-mode", umaskBroadcastValue.getValue());
        Iterable<ClusteringGroupInfo> clusteringOpsIterable = () -> clusteringOps;
        List<ClusteringGroupInfo> groupsInPartition = StreamSupport.stream(clusteringOpsIterable.spliterator(), false).collect(Collectors.toList());
        return groupsInPartition.stream().flatMap(clusteringOp -> runClusteringForGroup(clusteringOp, clusteringPlan.getStrategy().getStrategyParams(), Option.ofNullable(clusteringPlan.getPreserveHoodieMetadata()).orElse(false), serializableSchema, taskContextSupplier, instantTime)).iterator();
    });
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
    writeMetadata.setWriteStatuses(HoodieJavaRDD.of(writeStatusRDD));
    return writeMetadata;
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) KeyGenUtils(org.apache.hudi.keygen.KeyGenUtils) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) SerializableSchema(org.apache.hudi.common.config.SerializableSchema) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Logger(org.apache.log4j.Logger) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) StreamSupport(java.util.stream.StreamSupport) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) IndexedRecord(org.apache.avro.generic.IndexedRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) Broadcast(org.apache.spark.broadcast.Broadcast) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) Schema(org.apache.avro.Schema) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ClusteringOperation(org.apache.hudi.common.model.ClusteringOperation) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) WriteStatus(org.apache.hudi.client.WriteStatus) ClusteringGroupInfo(org.apache.hudi.common.model.ClusteringGroupInfo) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) Stream(java.util.stream.Stream) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieData(org.apache.hudi.common.data.HoodieData) Configuration(org.apache.hadoop.conf.Configuration) ClusteringGroupInfo(org.apache.hudi.common.model.ClusteringGroupInfo) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) SerializableSchema(org.apache.hudi.common.config.SerializableSchema) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 7 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SparkRDDWriteClient method cluster.

@Override
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
    HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
    preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
    HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
    if (pendingClusteringTimeline.containsInstant(inflightInstant)) {
        rollbackInflightClustering(inflightInstant, table);
        table.getMetaClient().reloadActiveTimeline();
    }
    clusteringTimer = metrics.getClusteringCtx();
    LOG.info("Starting clustering at " + clusteringInstant);
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = table.cluster(context, clusteringInstant);
    HoodieWriteMetadata<JavaRDD<WriteStatus>> clusteringMetadata = writeMetadata.clone(HoodieJavaRDD.getJavaRDD(writeMetadata.getWriteStatuses()));
    // TODO : Where is shouldComplete used ?
    if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) {
        completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), table, clusteringInstant);
    }
    return clusteringMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 8 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SparkRDDWriteClient method compact.

@Override
protected HoodieWriteMetadata<JavaRDD<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
    HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true);
    preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
    HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
    if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
        table.rollbackInflightCompaction(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
        table.getMetaClient().reloadActiveTimeline();
    }
    compactionTimer = metrics.getCompactionCtx();
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = table.compact(context, compactionInstantTime);
    HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeMetadata.clone(HoodieJavaRDD.getJavaRDD(writeMetadata.getWriteStatuses()));
    if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) {
        completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), table, compactionInstantTime);
    }
    return compactionMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 9 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class SparkValidatorUtils method runValidators.

/**
 * Check configured pre-commit validators and run them. Note that this only works for COW tables
 *
 * Throw error if there are validation failures.
 */
public static void runValidators(HoodieWriteConfig config, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata, HoodieEngineContext context, HoodieTable table, String instantTime) {
    if (StringUtils.isNullOrEmpty(config.getPreCommitValidators())) {
        LOG.info("no validators configured.");
    } else {
        if (!writeMetadata.getWriteStats().isPresent()) {
            writeMetadata.setWriteStats(writeMetadata.getWriteStatuses().map(WriteStatus::getStat).collectAsList());
        }
        Set<String> partitionsModified = writeMetadata.getWriteStats().get().stream().map(writeStats -> writeStats.getPartitionPath()).collect(Collectors.toSet());
        SQLContext sqlContext = new SQLContext(HoodieSparkEngineContext.getSparkContext(context));
        // Refresh timeline to ensure validator sees the any other operations done on timeline (async operations such as other clustering/compaction/rollback)
        table.getMetaClient().reloadActiveTimeline();
        Dataset<Row> beforeState = getRecordsFromCommittedFiles(sqlContext, partitionsModified, table).cache();
        Dataset<Row> afterState = getRecordsFromPendingCommits(sqlContext, partitionsModified, writeMetadata, table, instantTime).cache();
        Stream<SparkPreCommitValidator> validators = Arrays.stream(config.getPreCommitValidators().split(",")).map(validatorClass -> {
            return ((SparkPreCommitValidator) ReflectionUtils.loadClass(validatorClass, new Class<?>[] { HoodieSparkTable.class, HoodieEngineContext.class, HoodieWriteConfig.class }, table, context, config));
        });
        boolean allSuccess = validators.map(v -> runValidatorAsync(v, writeMetadata, beforeState, afterState, instantTime)).map(CompletableFuture::join).reduce(true, Boolean::logicalAnd);
        if (allSuccess) {
            LOG.info("All validations succeeded");
        } else {
            LOG.error("At least one pre-commit validation failed");
            throw new HoodieValidationException("At least one pre-commit validation failed");
        }
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) Dataset(org.apache.spark.sql.Dataset) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) SQLContext(org.apache.spark.sql.SQLContext) Set(java.util.Set) Row(org.apache.spark.sql.Row) Collectors(java.util.stream.Collectors) WriteStatus(org.apache.hudi.client.WriteStatus) SparkPreCommitValidator(org.apache.hudi.client.validator.SparkPreCommitValidator) List(java.util.List) Stream(java.util.stream.Stream) HoodieTablePreCommitFileSystemView(org.apache.hudi.common.table.view.HoodieTablePreCommitFileSystemView) JavaConverters(scala.collection.JavaConverters) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) LogManager(org.apache.log4j.LogManager) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) SparkPreCommitValidator(org.apache.hudi.client.validator.SparkPreCommitValidator) Row(org.apache.spark.sql.Row) WriteStatus(org.apache.hudi.client.WriteStatus) SQLContext(org.apache.spark.sql.SQLContext)

Example 10 with HoodieData

use of org.apache.hudi.common.data.HoodieData in project hudi by apache.

the class HoodieTableMetadataUtil method convertFilesToBloomFilterRecords.

/**
 * Convert added and deleted files metadata to bloom filter index records.
 */
public static HoodieData<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEngineContext engineContext, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, MetadataRecordsGenerationParams recordsGenerationParams, String instantTime) {
    HoodieData<HoodieRecord> allRecordsRDD = engineContext.emptyHoodieData();
    List<Pair<String, List<String>>> partitionToDeletedFilesList = partitionToDeletedFiles.entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
    int parallelism = Math.max(Math.min(partitionToDeletedFilesList.size(), recordsGenerationParams.getBloomIndexParallelism()), 1);
    HoodieData<Pair<String, List<String>>> partitionToDeletedFilesRDD = engineContext.parallelize(partitionToDeletedFilesList, parallelism);
    HoodieData<HoodieRecord> deletedFilesRecordsRDD = partitionToDeletedFilesRDD.flatMap(partitionToDeletedFilesPair -> {
        final String partitionName = partitionToDeletedFilesPair.getLeft();
        final List<String> deletedFileList = partitionToDeletedFilesPair.getRight();
        return deletedFileList.stream().flatMap(deletedFile -> {
            if (!FSUtils.isBaseFile(new Path(deletedFile))) {
                return Stream.empty();
            }
            final String partition = getPartition(partitionName);
            return Stream.<HoodieRecord>of(HoodieMetadataPayload.createBloomFilterMetadataRecord(partition, deletedFile, instantTime, StringUtils.EMPTY_STRING, ByteBuffer.allocate(0), true));
        }).iterator();
    });
    allRecordsRDD = allRecordsRDD.union(deletedFilesRecordsRDD);
    List<Pair<String, Map<String, Long>>> partitionToAppendedFilesList = partitionToAppendedFiles.entrySet().stream().map(entry -> Pair.of(entry.getKey(), entry.getValue())).collect(Collectors.toList());
    parallelism = Math.max(Math.min(partitionToAppendedFilesList.size(), recordsGenerationParams.getBloomIndexParallelism()), 1);
    HoodieData<Pair<String, Map<String, Long>>> partitionToAppendedFilesRDD = engineContext.parallelize(partitionToAppendedFilesList, parallelism);
    HoodieData<HoodieRecord> appendedFilesRecordsRDD = partitionToAppendedFilesRDD.flatMap(partitionToAppendedFilesPair -> {
        final String partitionName = partitionToAppendedFilesPair.getLeft();
        final Map<String, Long> appendedFileMap = partitionToAppendedFilesPair.getRight();
        final String partition = getPartition(partitionName);
        return appendedFileMap.entrySet().stream().flatMap(appendedFileLengthPairEntry -> {
            final String appendedFile = appendedFileLengthPairEntry.getKey();
            if (!FSUtils.isBaseFile(new Path(appendedFile))) {
                return Stream.empty();
            }
            final String pathWithPartition = partitionName + "/" + appendedFile;
            final Path appendedFilePath = new Path(recordsGenerationParams.getDataMetaClient().getBasePath(), pathWithPartition);
            try (HoodieFileReader<IndexedRecord> fileReader = HoodieFileReaderFactory.getFileReader(recordsGenerationParams.getDataMetaClient().getHadoopConf(), appendedFilePath)) {
                final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
                if (fileBloomFilter == null) {
                    LOG.error("Failed to read bloom filter for " + appendedFilePath);
                    return Stream.empty();
                }
                ByteBuffer bloomByteBuffer = ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes());
                HoodieRecord record = HoodieMetadataPayload.createBloomFilterMetadataRecord(partition, appendedFile, instantTime, recordsGenerationParams.getBloomFilterType(), bloomByteBuffer, false);
                return Stream.of(record);
            } catch (IOException e) {
                LOG.error("Failed to get bloom filter for file: " + appendedFilePath);
            }
            return Stream.empty();
        }).iterator();
    });
    allRecordsRDD = allRecordsRDD.union(appendedFilesRecordsRDD);
    return allRecordsRDD;
}
Also used : HoodieColumnRangeMetadata(org.apache.hudi.common.model.HoodieColumnRangeMetadata) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) ByteBuffer(java.nio.ByteBuffer) MAX(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) Collectors(java.util.stream.Collectors) TOTAL_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE) Objects(java.util.Objects) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) VALUE_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) TOTAL_UNCOMPRESSED_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE) EMPTY_PARTITION_NAME(org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) NULL_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) LinkedList(java.util.LinkedList) Nonnull(javax.annotation.Nonnull) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString) GenericRecord(org.apache.avro.generic.GenericRecord) MIN(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN) HoodieData(org.apache.hudi.common.data.HoodieData) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) ParquetUtils(org.apache.hudi.common.util.ParquetUtils) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) NON_PARTITIONED_NAME(org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) COLUMN_RANGE_MERGE_FUNCTION(org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ByteBuffer(java.nio.ByteBuffer) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) Pair(org.apache.hudi.common.util.collection.Pair)

Aggregations

HoodieData (org.apache.hudi.common.data.HoodieData)36 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)24 WriteStatus (org.apache.hudi.client.WriteStatus)22 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)22 List (java.util.List)21 HoodieTable (org.apache.hudi.table.HoodieTable)20 HoodieKey (org.apache.hudi.common.model.HoodieKey)18 LogManager (org.apache.log4j.LogManager)18 Logger (org.apache.log4j.Logger)18 IOException (java.io.IOException)17 Collectors (java.util.stream.Collectors)17 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)17 Option (org.apache.hudi.common.util.Option)17 Map (java.util.Map)16 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)16 HoodieWriteMetadata (org.apache.hudi.table.action.HoodieWriteMetadata)16 JavaRDD (org.apache.spark.api.java.JavaRDD)16 Pair (org.apache.hudi.common.util.collection.Pair)15 HoodieJavaRDD (org.apache.hudi.data.HoodieJavaRDD)15 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)14