Search in sources :

Example 1 with HoodieClusteringException

use of org.apache.hudi.exception.HoodieClusteringException in project hudi by apache.

the class MultipleSparkJobExecutionStrategy method readRecordsForGroupWithLogs.

/**
 * Read records from baseFiles, apply updates and convert to RDD.
 */
private HoodieData<HoodieRecord<T>> readRecordsForGroupWithLogs(JavaSparkContext jsc, List<ClusteringOperation> clusteringOps, String instantTime) {
    HoodieWriteConfig config = getWriteConfig();
    HoodieTable table = getHoodieTable();
    return HoodieJavaRDD.of(jsc.parallelize(clusteringOps, clusteringOps.size()).mapPartitions(clusteringOpsPartition -> {
        List<Iterator<HoodieRecord<T>>> recordIterators = new ArrayList<>();
        clusteringOpsPartition.forEachRemaining(clusteringOp -> {
            long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(new SparkTaskContextSupplier(), config);
            LOG.info("MaxMemoryPerCompaction run as part of clustering => " + maxMemoryPerCompaction);
            try {
                Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
                HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(table.getMetaClient().getFs()).withBasePath(table.getMetaClient().getBasePath()).withLogFilePaths(clusteringOp.getDeltaFilePaths()).withReaderSchema(readerSchema).withLatestInstantTime(instantTime).withMaxMemorySizeInBytes(maxMemoryPerCompaction).withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled()).withReverseReader(config.getCompactionReverseLogReadEnabled()).withBufferSize(config.getMaxDFSStreamBufferSize()).withSpillableMapBasePath(config.getSpillableMapBasePath()).withPartition(clusteringOp.getPartitionPath()).build();
                Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath()) ? Option.empty() : Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
                HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
                recordIterators.add(getFileSliceReader(baseFileReader, scanner, readerSchema, tableConfig.getPayloadClass(), tableConfig.getPreCombineField(), tableConfig.populateMetaFields() ? Option.empty() : Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()))));
            } catch (IOException e) {
                throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath() + " and " + clusteringOp.getDeltaFilePaths(), e);
            }
        });
        return new ConcatenatingIterator<>(recordIterators);
    }));
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Logger(org.apache.log4j.Logger) RDDSpatialCurveSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDSpatialCurveSortPartitioner) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) PLAN_STRATEGY_SORT_COLUMNS(org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) ClusteringOperation(org.apache.hudi.common.model.ClusteringOperation) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) KeyGenUtils(org.apache.hudi.keygen.KeyGenUtils) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieFileSliceReader.getFileSliceReader(org.apache.hudi.common.table.log.HoodieFileSliceReader.getFileSliceReader) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) IndexedRecord(org.apache.avro.generic.IndexedRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) RDDCustomColumnsSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) FutureUtils(org.apache.hudi.common.util.FutureUtils) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieTable(org.apache.hudi.table.HoodieTable) List(java.util.List) ArrayList(java.util.ArrayList) Option(org.apache.hudi.common.util.Option)

Example 2 with HoodieClusteringException

use of org.apache.hudi.exception.HoodieClusteringException in project hudi by apache.

the class SparkRDDWriteClient method completeClustering.

private void completeClustering(HoodieReplaceCommitMetadata metadata, HoodieTable table, String clusteringCommitTime) {
    List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
    if (writeStats.stream().mapToLong(s -> s.getTotalWriteErrors()).sum() > 0) {
        throw new HoodieClusteringException("Clustering failed to write to files:" + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
    }
    final HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());
        finalizeWrite(table, clusteringCommitTime, writeStats);
        // Update table's metadata (table)
        updateTableMetadata(table, metadata, clusteringInstant);
        // Update tables' metadata indexes
        // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
        table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
        LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
        table.getActiveTimeline().transitionReplaceInflightToComplete(HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } catch (Exception e) {
        throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
    } finally {
        this.txnManager.endTransaction(Option.of(clusteringInstant));
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (clusteringTimer != null) {
        long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(clusteringCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + clusteringCommitTime, e);
        }
    }
    LOG.info("Clustering successfully on commit " + clusteringCommitTime);
}
Also used : DistributedRegistry(org.apache.hudi.metrics.DistributedRegistry) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Logger(org.apache.log4j.Logger) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Registry(org.apache.hudi.common.metrics.Registry) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) SparkUpgradeDowngradeHelper(org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper) CompactHelpers(org.apache.hudi.table.action.compact.CompactHelpers) SparkConf(org.apache.spark.SparkConf) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ParseException(java.text.ParseException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException)

Example 3 with HoodieClusteringException

use of org.apache.hudi.exception.HoodieClusteringException in project hudi by apache.

the class JavaExecutionStrategy method readRecordsForGroupWithLogs.

/**
 * Read records from baseFiles and apply updates.
 */
private List<HoodieRecord<T>> readRecordsForGroupWithLogs(List<ClusteringOperation> clusteringOps, String instantTime) {
    HoodieWriteConfig config = getWriteConfig();
    HoodieTable table = getHoodieTable();
    List<HoodieRecord<T>> records = new ArrayList<>();
    clusteringOps.forEach(clusteringOp -> {
        long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(new JavaTaskContextSupplier(), config);
        LOG.info("MaxMemoryPerCompaction run as part of clustering => " + maxMemoryPerCompaction);
        try {
            Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
            HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(table.getMetaClient().getFs()).withBasePath(table.getMetaClient().getBasePath()).withLogFilePaths(clusteringOp.getDeltaFilePaths()).withReaderSchema(readerSchema).withLatestInstantTime(instantTime).withMaxMemorySizeInBytes(maxMemoryPerCompaction).withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled()).withReverseReader(config.getCompactionReverseLogReadEnabled()).withBufferSize(config.getMaxDFSStreamBufferSize()).withSpillableMapBasePath(config.getSpillableMapBasePath()).withPartition(clusteringOp.getPartitionPath()).build();
            Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath()) ? Option.empty() : Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
            HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
            Iterator<HoodieRecord<T>> fileSliceReader = getFileSliceReader(baseFileReader, scanner, readerSchema, tableConfig.getPayloadClass(), tableConfig.getPreCombineField(), tableConfig.populateMetaFields() ? Option.empty() : Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp())));
            fileSliceReader.forEachRemaining(records::add);
        } catch (IOException e) {
            throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath() + " and " + clusteringOp.getDeltaFilePaths(), e);
        }
    });
    return records;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) IOException(java.io.IOException) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieTable(org.apache.hudi.table.HoodieTable) JavaTaskContextSupplier(org.apache.hudi.client.common.JavaTaskContextSupplier)

Example 4 with HoodieClusteringException

use of org.apache.hudi.exception.HoodieClusteringException in project hudi by apache.

the class JavaExecutionStrategy method readRecordsForGroupBaseFiles.

/**
 * Read records from baseFiles.
 */
private List<HoodieRecord<T>> readRecordsForGroupBaseFiles(List<ClusteringOperation> clusteringOps) {
    List<HoodieRecord<T>> records = new ArrayList<>();
    clusteringOps.forEach(clusteringOp -> {
        try {
            Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(getWriteConfig().getSchema()));
            HoodieFileReader<IndexedRecord> baseFileReader = HoodieFileReaderFactory.getFileReader(getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath()));
            Iterator<IndexedRecord> recordIterator = baseFileReader.getRecordIterator(readerSchema);
            recordIterator.forEachRemaining(record -> records.add(transform(record)));
        } catch (IOException e) {
            throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath() + " and " + clusteringOp.getDeltaFilePaths(), e);
        }
    });
    return records;
}
Also used : Path(org.apache.hadoop.fs.Path) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 5 with HoodieClusteringException

use of org.apache.hudi.exception.HoodieClusteringException in project hudi by apache.

the class MultipleSparkJobExecutionStrategy method readRecordsForGroupBaseFiles.

/**
 * Read records from baseFiles and convert to RDD.
 */
private HoodieData<HoodieRecord<T>> readRecordsForGroupBaseFiles(JavaSparkContext jsc, List<ClusteringOperation> clusteringOps) {
    SerializableConfiguration hadoopConf = new SerializableConfiguration(getHoodieTable().getHadoopConf());
    HoodieWriteConfig writeConfig = getWriteConfig();
    // closure, as this might lead to issues attempting to serialize its nested fields
    return HoodieJavaRDD.of(jsc.parallelize(clusteringOps, clusteringOps.size()).mapPartitions(clusteringOpsPartition -> {
        List<Iterator<IndexedRecord>> iteratorsForPartition = new ArrayList<>();
        clusteringOpsPartition.forEachRemaining(clusteringOp -> {
            try {
                Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(writeConfig.getSchema()));
                HoodieFileReader<IndexedRecord> baseFileReader = HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(clusteringOp.getDataFilePath()));
                iteratorsForPartition.add(baseFileReader.getRecordIterator(readerSchema));
            } catch (IOException e) {
                throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath() + " and " + clusteringOp.getDeltaFilePaths(), e);
            }
        });
        return new ConcatenatingIterator<>(iteratorsForPartition);
    }).map(record -> transform(record, writeConfig)));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Logger(org.apache.log4j.Logger) RDDSpatialCurveSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDSpatialCurveSortPartitioner) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) PLAN_STRATEGY_SORT_COLUMNS(org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) ClusteringOperation(org.apache.hudi.common.model.ClusteringOperation) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) KeyGenUtils(org.apache.hudi.keygen.KeyGenUtils) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieFileSliceReader.getFileSliceReader(org.apache.hudi.common.table.log.HoodieFileSliceReader.getFileSliceReader) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) IndexedRecord(org.apache.avro.generic.IndexedRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) RDDCustomColumnsSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) FutureUtils(org.apache.hudi.common.util.FutureUtils) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) IndexedRecord(org.apache.avro.generic.IndexedRecord) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Schema(org.apache.avro.Schema) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) Iterator(java.util.Iterator)

Aggregations

HoodieClusteringException (org.apache.hudi.exception.HoodieClusteringException)6 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)5 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Schema (org.apache.avro.Schema)4 Path (org.apache.hadoop.fs.Path)4 HoodieData (org.apache.hudi.common.data.HoodieData)4 HoodieTable (org.apache.hudi.table.HoodieTable)4 List (java.util.List)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 IndexedRecord (org.apache.avro.generic.IndexedRecord)3 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)3 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)3 HoodieKey (org.apache.hudi.common.model.HoodieKey)3 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)3 Option (org.apache.hudi.common.util.Option)3 Iterator (java.util.Iterator)2 CompletableFuture (java.util.concurrent.CompletableFuture)2