Search in sources :

Example 1 with HoodieMetadataFileInfo

use of org.apache.hudi.avro.model.HoodieMetadataFileInfo in project hudi by apache.

the class HoodieMetadataPayload method createPartitionFilesRecord.

/**
 * Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
 *
 * @param partition    The name of the partition
 * @param filesAdded   Mapping of files to their sizes for files which have been added to this partition
 * @param filesDeleted List of files which have been deleted from this partition
 */
public static HoodieRecord<HoodieMetadataPayload> createPartitionFilesRecord(String partition, Option<Map<String, Long>> filesAdded, Option<List<String>> filesDeleted) {
    Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
    filesAdded.ifPresent(filesMap -> fileInfo.putAll(filesMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, (entry) -> {
        long fileSize = entry.getValue();
        // Assert that the file-size of the file being added is positive, since Hudi
        // should not be creating empty files
        checkState(fileSize > 0);
        return new HoodieMetadataFileInfo(fileSize, false);
    }))));
    filesDeleted.ifPresent(filesList -> fileInfo.putAll(filesList.stream().collect(Collectors.toMap(Function.identity(), (ignored) -> new HoodieMetadataFileInfo(0L, true)))));
    HoodieKey key = new HoodieKey(partition, MetadataPartitionType.FILES.getPartitionPath());
    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_FILE_LIST, fileInfo);
    return new HoodieAvroRecord<>(key, payload);
}
Also used : HoodieColumnRangeMetadata(org.apache.hudi.common.model.HoodieColumnRangeMetadata) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) ValidationUtils.checkState(org.apache.hudi.common.util.ValidationUtils.checkState) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) RECORDKEY_PARTITION_LIST(org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieMetadataBloomFilter(org.apache.hudi.avro.model.HoodieMetadataBloomFilter) TypeUtils.unsafeCast(org.apache.hudi.TypeUtils.unsafeCast) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieMetadataFileInfo(org.apache.hudi.avro.model.HoodieMetadataFileInfo) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) Properties(java.util.Properties) Collection(java.util.Collection) IOException(java.io.IOException) PartitionIndexID(org.apache.hudi.common.util.hash.PartitionIndexID) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ColumnIndexID(org.apache.hudi.common.util.hash.ColumnIndexID) List(java.util.List) Stream(java.util.stream.Stream) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) ValidationUtils.checkArgument(org.apache.hudi.common.util.ValidationUtils.checkArgument) FileIndexID(org.apache.hudi.common.util.hash.FileIndexID) HoodieHFileReader(org.apache.hudi.io.storage.HoodieHFileReader) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieKey(org.apache.hudi.common.model.HoodieKey) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HashMap(java.util.HashMap) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieMetadataFileInfo(org.apache.hudi.avro.model.HoodieMetadataFileInfo) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with HoodieMetadataFileInfo

use of org.apache.hudi.avro.model.HoodieMetadataFileInfo in project hudi by apache.

the class HoodieMetadataPayload method createPartitionListRecord.

/**
 * Create and return a {@code HoodieMetadataPayload} to save list of partitions.
 *
 * @param partitions The list of partitions
 */
public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitions) {
    Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
    partitions.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
    HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST, fileInfo);
    return new HoodieAvroRecord<>(key, payload);
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HashMap(java.util.HashMap) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieMetadataFileInfo(org.apache.hudi.avro.model.HoodieMetadataFileInfo)

Aggregations

HashMap (java.util.HashMap)2 HoodieMetadataFileInfo (org.apache.hudi.avro.model.HoodieMetadataFileInfo)2 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)2 HoodieKey (org.apache.hudi.common.model.HoodieKey)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 List (java.util.List)1 Map (java.util.Map)1 Properties (java.util.Properties)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1