Search in sources :

Example 46 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class HoodieParquetInputFormat method createBootstrappingRecordReader.

private RecordReader<NullWritable, ArrayWritable> createBootstrappingRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    BootstrapBaseFileSplit eSplit = (BootstrapBaseFileSplit) split;
    String[] rawColNames = HoodieColumnProjectionUtils.getReadColumnNames(job);
    List<Integer> rawColIds = HoodieColumnProjectionUtils.getReadColumnIDs(job);
    List<Pair<Integer, String>> projectedColsWithIndex = IntStream.range(0, rawColIds.size()).mapToObj(idx -> Pair.of(rawColIds.get(idx), rawColNames[idx])).collect(Collectors.toList());
    List<Pair<Integer, String>> hoodieColsProjected = projectedColsWithIndex.stream().filter(idxWithName -> HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue())).collect(Collectors.toList());
    List<Pair<Integer, String>> externalColsProjected = projectedColsWithIndex.stream().filter(idxWithName -> !HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue()) && !HoodieHiveUtils.VIRTUAL_COLUMN_NAMES.contains(idxWithName.getValue())).collect(Collectors.toList());
    // This always matches hive table description
    List<Pair<String, String>> colNameWithTypes = HoodieColumnProjectionUtils.getIOColumnNameAndTypes(job);
    List<Pair<String, String>> colNamesWithTypesForExternal = colNameWithTypes.stream().filter(p -> !HoodieRecord.HOODIE_META_COLUMNS.contains(p.getKey())).collect(Collectors.toList());
    LOG.info("colNameWithTypes =" + colNameWithTypes + ", Num Entries =" + colNameWithTypes.size());
    if (hoodieColsProjected.isEmpty()) {
        return getRecordReaderInternal(eSplit.getBootstrapFileSplit(), job, reporter);
    } else if (externalColsProjected.isEmpty()) {
        return getRecordReaderInternal(split, job, reporter);
    } else {
        FileSplit rightSplit = eSplit.getBootstrapFileSplit();
        // Hive PPD works at row-group level and only enabled when hive.optimize.index.filter=true;
        // The above config is disabled by default. But when enabled, would cause misalignment between
        // skeleton and bootstrap file. We will disable them specifically when query needs bootstrap and skeleton
        // file to be stitched.
        // This disables row-group filtering
        JobConf jobConfCopy = new JobConf(job);
        jobConfCopy.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
        jobConfCopy.unset(ConvertAstToSearchArg.SARG_PUSHDOWN);
        LOG.info("Generating column stitching reader for " + eSplit.getPath() + " and " + rightSplit.getPath());
        return new BootstrapColumnStichingRecordReader(getRecordReaderInternal(eSplit, jobConfCopy, reporter), HoodieRecord.HOODIE_META_COLUMNS.size(), getRecordReaderInternal(rightSplit, jobConfCopy, reporter), colNamesWithTypesForExternal.size(), true);
    }
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ConvertAstToSearchArg(org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg) NullWritable(org.apache.hadoop.io.NullWritable) Reporter(org.apache.hadoop.mapred.Reporter) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) JobConf(org.apache.hadoop.mapred.JobConf) Logger(org.apache.log4j.Logger) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordReader(org.apache.hadoop.mapred.RecordReader) LogManager(org.apache.log4j.LogManager) HoodieHiveUtils(org.apache.hudi.hadoop.utils.HoodieHiveUtils) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Pair(org.apache.hudi.common.util.collection.Pair) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) Pair(org.apache.hudi.common.util.collection.Pair)

Example 47 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class CleanPlanActionExecutor method requestClean.

/**
 * Generates List of files to be cleaned.
 *
 * @param context HoodieEngineContext
 * @return Cleaner Plan
 */
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
    try {
        CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
        Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
        context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
        List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
        if (partitionsToClean.isEmpty()) {
            LOG.info("Nothing to clean here. It is already clean");
            return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
        }
        LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
        LOG.info("Using cleanerParallelism: " + cleanerParallelism);
        context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
        Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
        return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to schedule clean operation", e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCleanFileInfo(org.apache.hudi.avro.model.HoodieCleanFileInfo) Collectors(java.util.stream.Collectors) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) List(java.util.List) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) List(java.util.List) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 48 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class BaseRollbackHelper method maybeDeleteAndCollectStats.

/**
 * May be delete interested files and collect stats or collect stats only.
 *
 * @param context           instance of {@link HoodieEngineContext} to use.
 * @param instantToRollback {@link HoodieInstant} of interest for which deletion or collect stats is requested.
 * @param rollbackRequests  List of {@link ListingBasedRollbackRequest} to be operated on.
 * @param doDelete          {@code true} if deletion has to be done. {@code false} if only stats are to be collected w/o performing any deletes.
 * @return stats collected with or w/o actual deletions.
 */
List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineContext context, HoodieInstant instantToRollback, List<SerializableHoodieRollbackRequest> rollbackRequests, boolean doDelete, int numPartitions) {
    return context.flatMap(rollbackRequests, (SerializableFunction<SerializableHoodieRollbackRequest, Stream<Pair<String, HoodieRollbackStat>>>) rollbackRequest -> {
        List<String> filesToBeDeleted = rollbackRequest.getFilesToBeDeleted();
        if (!filesToBeDeleted.isEmpty()) {
            List<HoodieRollbackStat> rollbackStats = deleteFiles(metaClient, filesToBeDeleted, doDelete);
            List<Pair<String, HoodieRollbackStat>> partitionToRollbackStats = new ArrayList<>();
            rollbackStats.forEach(entry -> partitionToRollbackStats.add(Pair.of(entry.getPartitionPath(), entry)));
            return partitionToRollbackStats.stream();
        } else if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
            HoodieLogFormat.Writer writer = null;
            try {
                String fileId = rollbackRequest.getFileId();
                String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
                writer = HoodieLogFormat.newWriterBuilder().onParentPath(FSUtils.getPartitionPath(metaClient.getBasePath(), rollbackRequest.getPartitionPath())).withFileId(fileId).overBaseCommit(latestBaseInstant).withFs(metaClient.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
                if (doDelete) {
                    Map<HoodieLogBlock.HeaderMetadataType, String> header = generateHeader(instantToRollback.getTimestamp());
                    writer.appendBlock(new HoodieCommandBlock(header));
                }
            } catch (IOException | InterruptedException io) {
                throw new HoodieRollbackException("Failed to rollback for instant " + instantToRollback, io);
            } finally {
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (IOException io) {
                    throw new HoodieIOException("Error appending rollback block", io);
                }
            }
            Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()), 1L);
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).withRollbackBlockAppendResults(filesToNumBlocksRollback).build())).stream();
        } else {
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).build())).stream();
        }
    }, numPartitions);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) PathFilter(org.apache.hadoop.fs.PathFilter) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) SerializableFunction(org.apache.hudi.common.function.SerializableFunction) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) Stream(java.util.stream.Stream) ArrayList(java.util.ArrayList) List(java.util.List)

Example 49 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class BaseCommitActionExecutor method saveWorkloadProfileMetadataToInflight.

/**
 * Save the workload profile in an intermediate file (here re-using commit files) This is useful when performing
 * rollback for MOR tables. Only updates are recorded in the workload profile metadata since updates to log blocks
 * are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO :
 * Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
 */
void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, String instantTime) throws HoodieCommitException {
    try {
        HoodieCommitMetadata metadata = new HoodieCommitMetadata();
        profile.getOutputPartitionPaths().forEach(path -> {
            WorkloadStat partitionStat = profile.getOutputWorkloadStat(path);
            HoodieWriteStat insertStat = new HoodieWriteStat();
            insertStat.setNumInserts(partitionStat.getNumInserts());
            insertStat.setFileId("");
            insertStat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
            metadata.addWriteStat(path, insertStat);
            Map<String, Pair<String, Long>> updateLocationMap = partitionStat.getUpdateLocationToCount();
            Map<String, Pair<String, Long>> insertLocationMap = partitionStat.getInsertLocationToCount();
            Stream.concat(updateLocationMap.keySet().stream(), insertLocationMap.keySet().stream()).distinct().forEach(fileId -> {
                HoodieWriteStat writeStat = new HoodieWriteStat();
                writeStat.setFileId(fileId);
                Pair<String, Long> updateLocation = updateLocationMap.get(fileId);
                Pair<String, Long> insertLocation = insertLocationMap.get(fileId);
                // TODO : Write baseCommitTime is possible here ?
                writeStat.setPrevCommit(updateLocation != null ? updateLocation.getKey() : insertLocation.getKey());
                if (updateLocation != null) {
                    writeStat.setNumUpdateWrites(updateLocation.getValue());
                }
                if (insertLocation != null) {
                    writeStat.setNumInserts(insertLocation.getValue());
                }
                metadata.addWriteStat(path, writeStat);
            });
        });
        metadata.setOperationType(operationType);
        HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
        String commitActionType = getCommitActionType();
        HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime);
        activeTimeline.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)), config.shouldAllowMultiWriteOnSameInstant());
    } catch (IOException io) {
        throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io);
    }
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) WorkloadStat(org.apache.hudi.table.WorkloadStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair)

Example 50 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class HoodieTable method reconcileAgainstMarkers.

/**
 * Reconciles WriteStats and marker files to detect and safely delete duplicate data files created because of Spark
 * retries.
 *
 * @param context HoodieEngineContext
 * @param instantTs Instant Timestamp
 * @param stats Hoodie Write Stat
 * @param consistencyCheckEnabled Consistency Check Enabled
 * @throws HoodieIOException
 */
protected void reconcileAgainstMarkers(HoodieEngineContext context, String instantTs, List<HoodieWriteStat> stats, boolean consistencyCheckEnabled) throws HoodieIOException {
    try {
        // Reconcile marker and data files with WriteStats so that partially written data-files due to failed
        // (but succeeded on retry) tasks are removed.
        String basePath = getMetaClient().getBasePath();
        WriteMarkers markers = WriteMarkersFactory.get(config.getMarkersType(), this, instantTs);
        if (!markers.doesMarkerDirExist()) {
            // can happen if it was an empty write say.
            return;
        }
        // we are not including log appends here, since they are already fail-safe.
        Set<String> invalidDataPaths = getInvalidDataPaths(markers);
        Set<String> validDataPaths = stats.stream().map(HoodieWriteStat::getPath).filter(p -> p.endsWith(this.getBaseFileExtension())).collect(Collectors.toSet());
        // Contains list of partially created files. These needs to be cleaned up.
        invalidDataPaths.removeAll(validDataPaths);
        if (!invalidDataPaths.isEmpty()) {
            LOG.info("Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
            Map<String, List<Pair<String, String>>> invalidPathsByPartition = invalidDataPaths.stream().map(dp -> Pair.of(new Path(basePath, dp).getParent().toString(), new Path(basePath, dp).toString())).collect(Collectors.groupingBy(Pair::getKey));
            // Otherwise, we may miss deleting such files. If files are not found even after retries, fail the commit
            if (consistencyCheckEnabled) {
                // This will either ensure all files to be deleted are present.
                waitForAllFiles(context, invalidPathsByPartition, FileVisibility.APPEAR);
            }
            // Now delete partially written files
            context.setJobStatus(this.getClass().getSimpleName(), "Delete all partially written files");
            deleteInvalidFilesByPartitions(context, invalidPathsByPartition);
            // Now ensure the deleted files disappear
            if (consistencyCheckEnabled) {
                // This will either ensure all files to be deleted are absent.
                waitForAllFiles(context, invalidPathsByPartition, FileVisibility.DISAPPEAR);
            }
        }
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodiePendingRollbackInfo(org.apache.hudi.common.HoodiePendingRollbackInfo) ConsistencyGuard(org.apache.hudi.common.fs.ConsistencyGuard) TimeoutException(java.util.concurrent.TimeoutException) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata) Logger(org.apache.log4j.Logger) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLayoutFactory(org.apache.hudi.table.storage.HoodieLayoutFactory) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) Schema(org.apache.avro.Schema) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) FileSystemViewManager(org.apache.hudi.common.table.view.FileSystemViewManager) Serializable(java.io.Serializable) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) OptimisticConsistencyGuard(org.apache.hudi.common.fs.OptimisticConsistencyGuard) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieStorageLayout(org.apache.hudi.table.storage.HoodieStorageLayout) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) HoodieBootstrapWriteMetadata(org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileVisibility(org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Function(java.util.function.Function) FailSafeConsistencyGuard(org.apache.hudi.common.fs.FailSafeConsistencyGuard) ArrayList(java.util.ArrayList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Nonnull(javax.annotation.Nonnull) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) SpecificRecordBase(org.apache.avro.specific.SpecificRecordBase) IOException(java.io.IOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieKey(org.apache.hudi.common.model.HoodieKey) Functions(org.apache.hudi.common.util.Functions) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) List(java.util.List) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

Pair (org.apache.hudi.common.util.collection.Pair)147 List (java.util.List)98 Map (java.util.Map)91 IOException (java.io.IOException)89 Collectors (java.util.stream.Collectors)87 Option (org.apache.hudi.common.util.Option)87 ArrayList (java.util.ArrayList)85 Path (org.apache.hadoop.fs.Path)81 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)76 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)66 HashMap (java.util.HashMap)65 LogManager (org.apache.log4j.LogManager)64 Logger (org.apache.log4j.Logger)64 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)63 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)58 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)54 HoodieIOException (org.apache.hudi.exception.HoodieIOException)54 Arrays (java.util.Arrays)48 HoodieTable (org.apache.hudi.table.HoodieTable)46 Test (org.junit.jupiter.api.Test)46