Search in sources :

Example 16 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class BaseRollbackActionExecutor method runRollback.

private HoodieRollbackMetadata runRollback(HoodieTable<T, I, K, O> table, HoodieInstant rollbackInstant, HoodieRollbackPlan rollbackPlan) {
    ValidationUtils.checkArgument(rollbackInstant.getState().equals(HoodieInstant.State.REQUESTED) || rollbackInstant.getState().equals(HoodieInstant.State.INFLIGHT));
    final HoodieTimer timer = new HoodieTimer();
    timer.startTimer();
    final HoodieInstant inflightInstant = rollbackInstant.isRequested() ? table.getActiveTimeline().transitionRollbackRequestedToInflight(rollbackInstant) : rollbackInstant;
    HoodieTimer rollbackTimer = new HoodieTimer().startTimer();
    List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackPlan);
    HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.convertRollbackMetadata(instantTime, Option.of(rollbackTimer.endTimer()), Collections.singletonList(instantToRollback), stats);
    if (!skipTimelinePublish) {
        finishRollback(inflightInstant, rollbackMetadata);
    }
    // Finally, remove the markers post rollback.
    WriteMarkersFactory.get(config.getMarkersType(), table, instantToRollback.getTimestamp()).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    return rollbackMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Example 17 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class CopyOnWriteRollbackActionExecutor method executeRollback.

@Override
protected List<HoodieRollbackStat> executeRollback(HoodieRollbackPlan hoodieRollbackPlan) {
    HoodieTimer rollbackTimer = new HoodieTimer();
    rollbackTimer.startTimer();
    List<HoodieRollbackStat> stats = new ArrayList<>();
    HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
    HoodieInstant resolvedInstant = instantToRollback;
    if (instantToRollback.isCompleted()) {
        LOG.info("Unpublishing instant " + instantToRollback);
        resolvedInstant = activeTimeline.revertToInflight(instantToRollback);
        // reload meta-client to reflect latest timeline status
        table.getMetaClient().reloadActiveTimeline();
    }
    // deleting the timeline file
    if (!resolvedInstant.isRequested()) {
        // delete all the data files for this commit
        LOG.info("Clean out all base files generated for commit: " + resolvedInstant);
        stats = executeRollback(resolvedInstant, hoodieRollbackPlan);
    }
    dropBootstrapIndexIfNeeded(instantToRollback);
    // Delete Inflight instant if enabled
    deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, resolvedInstant);
    LOG.info("Time(in ms) taken to finish rollback " + rollbackTimer.endTimer());
    return stats;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Example 18 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class HoodieKeyLookupHandle method getBloomFilter.

private BloomFilter getBloomFilter() {
    BloomFilter bloomFilter = null;
    HoodieTimer timer = new HoodieTimer().startTimer();
    try {
        if (config.isMetadataBloomFilterIndexEnabled()) {
            bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight()).orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight()));
        } else {
            try (HoodieFileReader reader = createNewFileReader()) {
                bloomFilter = reader.readBloomFilter();
            }
        }
    } catch (IOException e) {
        throw new HoodieIndexException(String.format("Error reading bloom filter from %s", getPartitionPathFileIDPair()), e);
    }
    LOG.info(String.format("Read bloom filter from %s in %d ms", partitionPathFileIDPair, timer.endTimer()));
    return bloomFilter;
}
Also used : HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) HoodieIndexException(org.apache.hudi.exception.HoodieIndexException) IOException(java.io.IOException) BloomFilter(org.apache.hudi.common.bloom.BloomFilter)

Example 19 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class HoodieIndexUtils method filterKeysFromFile.

/**
 * Given a list of row keys and one file, return only row keys existing in that file.
 *
 * @param filePath            - File to filter keys from
 * @param candidateRecordKeys - Candidate keys to filter
 * @return List of candidate keys that are available in the file
 */
public static List<String> filterKeysFromFile(Path filePath, List<String> candidateRecordKeys, Configuration configuration) throws HoodieIndexException {
    ValidationUtils.checkArgument(FSUtils.isBaseFile(filePath));
    List<String> foundRecordKeys = new ArrayList<>();
    try {
        // Load all rowKeys from the file, to double-confirm
        if (!candidateRecordKeys.isEmpty()) {
            HoodieTimer timer = new HoodieTimer().startTimer();
            HoodieFileReader fileReader = HoodieFileReaderFactory.getFileReader(configuration, filePath);
            Set<String> fileRowKeys = fileReader.filterRowKeys(new TreeSet<>(candidateRecordKeys));
            foundRecordKeys.addAll(fileRowKeys);
            LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath, timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
            if (LOG.isDebugEnabled()) {
                LOG.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
            }
        }
    } catch (Exception e) {
        throw new HoodieIndexException("Error checking candidate keys against file.", e);
    }
    return foundRecordKeys;
}
Also used : ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) HoodieIndexException(org.apache.hudi.exception.HoodieIndexException) HoodieIndexException(org.apache.hudi.exception.HoodieIndexException)

Example 20 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class AbstractTableFileSystemView method resetFileGroupsReplaced.

/**
 * Get replaced instant for each file group by looking at all commit instants.
 */
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
    HoodieTimer hoodieTimer = new HoodieTimer();
    hoodieTimer.startTimer();
    // for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
    HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
    Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = replacedTimeline.getInstants().flatMap(instant -> {
        try {
            HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
            // get replace instant mapping for each partition, fileId
            return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e -> new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
        } catch (HoodieIOException ex) {
            if (ex.getIOException() instanceof FileNotFoundException) {
                // Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
                // So that we need to catch the FileNotFoundException here and continue
                LOG.warn(ex.getMessage());
                return Stream.empty();
            } else {
                throw ex;
            }
        } catch (IOException e) {
            throw new HoodieIOException("error reading commit metadata for " + instant);
        }
    });
    Map<HoodieFileGroupId, HoodieInstant> replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    resetReplacedFileGroups(replacedFileGroups);
    LOG.info("Took " + hoodieTimer.endTimer() + " ms to read  " + replacedTimeline.countInstants() + " instants, " + replacedFileGroups.size() + " replaced file groups");
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileNotFoundException(java.io.FileNotFoundException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieTimer (org.apache.hudi.common.util.HoodieTimer)35 ArrayList (java.util.ArrayList)16 Path (org.apache.hadoop.fs.Path)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)12 Option (org.apache.hudi.common.util.Option)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)11 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)10 Map (java.util.Map)9 Pair (org.apache.hudi.common.util.collection.Pair)9 List (java.util.List)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)7 LogManager (org.apache.log4j.LogManager)7 Logger (org.apache.log4j.Logger)7 Collectors (java.util.stream.Collectors)6 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)6 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)6