Search in sources :

Example 96 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BaseHoodieWriteClient method finalizeWrite.

/**
 * Finalize Write operation.
 *
 * @param table HoodieTable
 * @param instantTime Instant Time
 * @param stats Hoodie Write Stat
 */
protected void finalizeWrite(HoodieTable table, String instantTime, List<HoodieWriteStat> stats) {
    try {
        final Timer.Context finalizeCtx = metrics.getFinalizeCtx();
        table.finalizeWrite(context, instantTime, stats);
        if (finalizeCtx != null) {
            Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop()));
            durationInMs.ifPresent(duration -> {
                LOG.info("Finalize write elapsed time (milliseconds): " + duration);
                metrics.updateFinalizeWriteMetrics(duration, stats.size());
            });
        }
    } catch (HoodieIOException ioe) {
        throw new HoodieCommitException("Failed to complete commit " + instantTime + " due to finalize errors.", ioe);
    }
}
Also used : HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Timer(com.codahale.metrics.Timer)

Example 97 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CompactionAdminClient method validateCompactionPlan.

/**
 * Validate all compaction operations in a compaction plan. Verifies the file-slices are consistent with corresponding
 * compaction operations.
 *
 * @param metaClient Hoodie Table Meta Client
 * @param compactionInstant Compaction Instant
 */
public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism) throws IOException {
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    if (plan.getOperations() != null) {
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Validate compaction operations");
        return context.map(ops, op -> {
            try {
                return validateCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView));
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        }, parallelism);
    }
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 98 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionPlan.

/**
 * Generate renaming actions for unscheduling a pending compaction plan. NOTE: Can only be used safely when no writer
 * (ingestion/compaction) is running.
 *
 * @param metaClient Hoodie Table MetaClient
 * @param compactionInstant Compaction Instant to be unscheduled
 * @param fsViewOpt Cached File System View
 * @param skipValidation Skip Validation
 * @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
 *         compaction.
 */
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
    HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    if (plan.getOperations() != null) {
        LOG.info("Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant);
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Generate compaction unscheduling operations");
        return context.flatMap(ops, op -> {
            try {
                return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView), skipValidation).stream();
            } catch (IOException ioe) {
                throw new HoodieIOException(ioe.getMessage(), ioe);
            } catch (CompactionValidationException ve) {
                throw new HoodieException(ve);
            }
        }, parallelism);
    }
    LOG.warn("No operations for compaction instant : " + compactionInstant);
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 99 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieMergeHandle method initializeIncomingRecordsMap.

/**
 * Initialize a spillable map for incoming records.
 */
protected void initializeIncomingRecordsMap() {
    try {
        // Load the new records in a map
        long memoryForMerge = IOUtils.getMaxMemoryPerPartitionMerge(taskContextSupplier, config);
        LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
        this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(), new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(tableSchema), config.getCommonConfig().getSpillableDiskMapType(), config.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
    } catch (IOException io) {
        throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
    }
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator)

Example 100 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class AbstractTableFileSystemView method resetFileGroupsReplaced.

/**
 * Get replaced instant for each file group by looking at all commit instants.
 */
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
    HoodieTimer hoodieTimer = new HoodieTimer();
    hoodieTimer.startTimer();
    // for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
    HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
    Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = replacedTimeline.getInstants().flatMap(instant -> {
        try {
            HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
            // get replace instant mapping for each partition, fileId
            return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e -> new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
        } catch (HoodieIOException ex) {
            if (ex.getIOException() instanceof FileNotFoundException) {
                // Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
                // So that we need to catch the FileNotFoundException here and continue
                LOG.warn(ex.getMessage());
                return Stream.empty();
            } else {
                throw ex;
            }
        } catch (IOException e) {
            throw new HoodieIOException("error reading commit metadata for " + instant);
        }
    });
    Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    resetReplacedFileGroups(replacedFileGroups);
    LOG.info("Took " + hoodieTimer.endTimer() + " ms to read  " + replacedTimeline.countInstants() + " instants, " + replacedFileGroups.size() + " replaced file groups");
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileNotFoundException(java.io.FileNotFoundException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17