Search in sources :

Example 86 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class HoodieTableMetadataUtil method processRollbackMetadata.

/**
 * Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
 * <p>
 * During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
 * function will extract this change file for each partition.
 *
 * @param metadataTableTimeline    Current timeline of the Metadata Table
 * @param rollbackMetadata         {@code HoodieRollbackMetadata}
 * @param partitionToDeletedFiles  The {@code Map} to fill with files deleted per partition.
 * @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
 */
private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, Option<String> lastSyncTs) {
    rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
        final String instantToRollback = rollbackMetadata.getCommitsRollback().get(0);
        // Has this rollback produced new files?
        boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
        boolean hasNonZeroRollbackLogFiles = hasRollbackLogFiles && pm.getRollbackLogFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
        // If instant-to-rollback has not been synced to metadata table yet then there is no need to update metadata
        // This can happen in two cases:
        // Case 1: Metadata Table timeline is behind the instant-to-rollback.
        boolean shouldSkip = lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(instantToRollback, HoodieTimeline.GREATER_THAN, lastSyncTs.get());
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s", instantToRollback, lastSyncTs.get()));
            return;
        }
        // Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
        // was a failed commit (never completed) as only completed instants are synced to Metadata Table.
        // But the required Metadata Table instants should not have been archived
        HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
        if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
            throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived", syncedInstant, instantToRollback));
        }
        shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table", instantToRollback));
            return;
        }
        final String partition = pm.getPartitionPath();
        if ((!pm.getSuccessDeleteFiles().isEmpty() || !pm.getFailedDeleteFiles().isEmpty()) && !shouldSkip) {
            if (!partitionToDeletedFiles.containsKey(partition)) {
                partitionToDeletedFiles.put(partition, new ArrayList<>());
            }
            // Extract deleted file name from the absolute paths saved in getSuccessDeleteFiles()
            List<String> deletedFiles = pm.getSuccessDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList());
            if (!pm.getFailedDeleteFiles().isEmpty()) {
                deletedFiles.addAll(pm.getFailedDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList()));
            }
            partitionToDeletedFiles.get(partition).addAll(deletedFiles);
        }
        BiFunction<Long, Long, Long> fileMergeFn = (oldSize, newSizeCopy) -> {
            // as rollback file could have been updated after written log files are computed.
            return oldSize > newSizeCopy ? oldSize : newSizeCopy;
        };
        if (hasRollbackLogFiles) {
            if (!partitionToAppendedFiles.containsKey(partition)) {
                partitionToAppendedFiles.put(partition, new HashMap<>());
            }
            // Extract appended file name from the absolute paths saved in getAppendFiles()
            pm.getRollbackLogFiles().forEach((path, size) -> {
                partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
            });
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieColumnRangeMetadata(org.apache.hudi.common.model.HoodieColumnRangeMetadata) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) ByteBuffer(java.nio.ByteBuffer) MAX(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) Collectors(java.util.stream.Collectors) TOTAL_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE) Objects(java.util.Objects) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) VALUE_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) TOTAL_UNCOMPRESSED_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE) EMPTY_PARTITION_NAME(org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) NULL_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) LinkedList(java.util.LinkedList) Nonnull(javax.annotation.Nonnull) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString) GenericRecord(org.apache.avro.generic.GenericRecord) MIN(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN) HoodieData(org.apache.hudi.common.data.HoodieData) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) ParquetUtils(org.apache.hudi.common.util.ParquetUtils) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) NON_PARTITIONED_NAME(org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) COLUMN_RANGE_MERGE_FUNCTION(org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString)

Example 87 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseTableMetadata method getColumnStats.

@Override
public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName) throws HoodieMetadataException {
    if (!isColumnStatsIndexEnabled) {
        LOG.error("Metadata column stats index is disabled!");
        return Collections.emptyMap();
    }
    Map<String, Pair<String, String>> columnStatKeyToFileNameMap = new HashMap<>();
    TreeSet<String> sortedKeys = new TreeSet<>();
    final ColumnIndexID columnIndexID = new ColumnIndexID(columnName);
    for (Pair<String, String> partitionNameFileNamePair : partitionNameFileNameList) {
        final String columnStatsIndexKey = HoodieMetadataPayload.getColumnStatsIndexKey(new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()), columnIndexID);
        sortedKeys.add(columnStatsIndexKey);
        columnStatKeyToFileNameMap.put(columnStatsIndexKey, partitionNameFileNamePair);
    }
    List<String> columnStatKeys = new ArrayList<>(sortedKeys);
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = getRecordsByKeys(columnStatKeys, MetadataPartitionType.COLUMN_STATS.getPartitionPath());
    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, timer.endTimer()));
    Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatMap = new HashMap<>();
    for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
        if (entry.getRight().isPresent()) {
            final Option<HoodieMetadataColumnStats> columnStatMetadata = entry.getRight().get().getData().getColumnStatMetadata();
            if (columnStatMetadata.isPresent()) {
                if (!columnStatMetadata.get().getIsDeleted()) {
                    ValidationUtils.checkState(columnStatKeyToFileNameMap.containsKey(entry.getLeft()));
                    final Pair<String, String> partitionFileNamePair = columnStatKeyToFileNameMap.get(entry.getLeft());
                    ValidationUtils.checkState(!fileToColumnStatMap.containsKey(partitionFileNamePair));
                    fileToColumnStatMap.put(partitionFileNamePair, columnStatMetadata.get());
                }
            } else {
                LOG.error("Meta index column stats missing for: " + entry.getLeft());
            }
        }
    }
    return fileToColumnStatMap;
}
Also used : HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) PartitionIndexID(org.apache.hudi.common.util.hash.PartitionIndexID) TreeSet(java.util.TreeSet) ColumnIndexID(org.apache.hudi.common.util.hash.ColumnIndexID) Option(org.apache.hudi.common.util.Option) FileIndexID(org.apache.hudi.common.util.hash.FileIndexID) Pair(org.apache.hudi.common.util.collection.Pair)

Example 88 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseTableMetadata method getBloomFilters.

@Override
public Map<Pair<String, String>, BloomFilter> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList) throws HoodieMetadataException {
    if (!isBloomFilterIndexEnabled) {
        LOG.error("Metadata bloom filter index is disabled!");
        return Collections.emptyMap();
    }
    if (partitionNameFileNameList.isEmpty()) {
        return Collections.emptyMap();
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    Set<String> partitionIDFileIDSortedStrings = new TreeSet<>();
    Map<String, Pair<String, String>> fileToKeyMap = new HashMap<>();
    partitionNameFileNameList.forEach(partitionNameFileNamePair -> {
        final String bloomFilterIndexKey = HoodieMetadataPayload.getBloomFilterIndexKey(new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()));
        partitionIDFileIDSortedStrings.add(bloomFilterIndexKey);
        fileToKeyMap.put(bloomFilterIndexKey, partitionNameFileNamePair);
    });
    List<String> partitionIDFileIDStrings = new ArrayList<>(partitionIDFileIDSortedStrings);
    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = getRecordsByKeys(partitionIDFileIDStrings, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_BLOOM_FILTERS_METADATA_STR, (timer.endTimer() / partitionIDFileIDStrings.size())));
    Map<Pair<String, String>, BloomFilter> partitionFileToBloomFilterMap = new HashMap<>();
    for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
        if (entry.getRight().isPresent()) {
            final Option<HoodieMetadataBloomFilter> bloomFilterMetadata = entry.getRight().get().getData().getBloomFilterMetadata();
            if (bloomFilterMetadata.isPresent()) {
                if (!bloomFilterMetadata.get().getIsDeleted()) {
                    ValidationUtils.checkState(fileToKeyMap.containsKey(entry.getLeft()));
                    final ByteBuffer bloomFilterByteBuffer = bloomFilterMetadata.get().getBloomFilter();
                    final String bloomFilterType = bloomFilterMetadata.get().getType();
                    final BloomFilter bloomFilter = BloomFilterFactory.fromString(StandardCharsets.UTF_8.decode(bloomFilterByteBuffer).toString(), bloomFilterType);
                    partitionFileToBloomFilterMap.put(fileToKeyMap.get(entry.getLeft()), bloomFilter);
                }
            } else {
                LOG.error("Meta index bloom filter missing for: " + fileToKeyMap.get(entry.getLeft()));
            }
        }
    }
    return partitionFileToBloomFilterMap;
}
Also used : HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ByteBuffer(java.nio.ByteBuffer) HoodieMetadataBloomFilter(org.apache.hudi.avro.model.HoodieMetadataBloomFilter) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) PartitionIndexID(org.apache.hudi.common.util.hash.PartitionIndexID) HoodieMetadataBloomFilter(org.apache.hudi.avro.model.HoodieMetadataBloomFilter) TreeSet(java.util.TreeSet) Option(org.apache.hudi.common.util.Option) FileIndexID(org.apache.hudi.common.util.hash.FileIndexID) Pair(org.apache.hudi.common.util.collection.Pair)

Example 89 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseTableMetadata method fetchAllFilesInPartitionPaths.

Map<String, FileStatus[]> fetchAllFilesInPartitionPaths(List<Path> partitionPaths) throws IOException {
    Map<String, Path> partitionInfo = new HashMap<>();
    boolean foundNonPartitionedPath = false;
    for (Path partitionPath : partitionPaths) {
        String partitionName = FSUtils.getRelativePartitionPath(new Path(dataBasePath), partitionPath);
        if (partitionName.isEmpty()) {
            if (partitionInfo.size() > 1) {
                throw new HoodieMetadataException("Found mix of partitioned and non partitioned paths while fetching data from metadata table");
            }
            partitionInfo.put(NON_PARTITIONED_NAME, partitionPath);
            foundNonPartitionedPath = true;
        } else {
            if (foundNonPartitionedPath) {
                throw new HoodieMetadataException("Found mix of partitioned and non partitioned paths while fetching data from metadata table");
            }
            partitionInfo.put(partitionName, partitionPath);
        }
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> partitionsFileStatus = getRecordsByKeys(new ArrayList<>(partitionInfo.keySet()), MetadataPartitionType.FILES.getPartitionPath());
    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_FILES_STR, timer.endTimer()));
    Map<String, FileStatus[]> result = new HashMap<>();
    for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : partitionsFileStatus) {
        if (entry.getValue().isPresent()) {
            mayBeHandleSpuriousDeletes(entry.getValue(), entry.getKey());
            result.put(partitionInfo.get(entry.getKey()).toString(), entry.getValue().get().getData().getFileStatuses(hadoopConf.get(), partitionInfo.get(entry.getKey())));
        }
    }
    LOG.info("Listed files in partitions from metadata: partition list =" + Arrays.toString(partitionPaths.toArray()));
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) Option(org.apache.hudi.common.util.Option) Pair(org.apache.hudi.common.util.collection.Pair)

Example 90 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class HoodieBackedTableMetadata method getLogRecordScanner.

public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles, String partitionName) {
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<String> sortedLogFilePaths = logFiles.stream().sorted(HoodieLogFile.getLogFileComparator()).map(o -> o.getPath().toString()).collect(Collectors.toList());
    // Only those log files which have a corresponding completed instant on the dataset should be read
    // This is because the metadata table is updated before the dataset instants are committed.
    Set<String> validInstantTimestamps = getValidInstantTimestamps();
    Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
    String latestMetadataInstantTime = latestMetadataInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
    // Load the schema
    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
    HoodieMetadataMergedLogRecordReader logRecordScanner = HoodieMetadataMergedLogRecordReader.newBuilder().withFileSystem(metadataMetaClient.getFs()).withBasePath(metadataBasePath).withLogFilePaths(sortedLogFilePaths).withReaderSchema(schema).withLatestInstantTime(latestMetadataInstantTime).withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES).withBufferSize(BUFFER_SIZE).withSpillableMapBasePath(spillableMapDirectory).withDiskMapType(commonConfig.getSpillableDiskMapType()).withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled()).withLogBlockTimestamps(validInstantTimestamps).enableFullScan(metadataConfig.enableFullScan()).withPartition(partitionName).build();
    Long logScannerOpenMs = timer.endTimer();
    LOG.info(String.format("Opened %d metadata log files (dataset instant=%s, metadata instant=%s) in %d ms", sortedLogFilePaths.size(), getLatestDataInstantTime(), latestMetadataInstantTime, logScannerOpenMs));
    return Pair.of(logRecordScanner, logScannerOpenMs);
}
Also used : HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LinkedList(java.util.LinkedList) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) SpillableMapUtils(org.apache.hudi.common.util.SpillableMapUtils) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) Schema(org.apache.avro.Schema) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Aggregations

Option (org.apache.hudi.common.util.Option)105 List (java.util.List)84 IOException (java.io.IOException)70 Collectors (java.util.stream.Collectors)69 Map (java.util.Map)67 ArrayList (java.util.ArrayList)61 Path (org.apache.hadoop.fs.Path)59 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)59 Pair (org.apache.hudi.common.util.collection.Pair)59 HashMap (java.util.HashMap)58 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)56 LogManager (org.apache.log4j.LogManager)54 Logger (org.apache.log4j.Logger)54 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)53 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)46 HoodieIOException (org.apache.hudi.exception.HoodieIOException)44 Arrays (java.util.Arrays)43 FSUtils (org.apache.hudi.common.fs.FSUtils)43 Collections (java.util.Collections)39