Search in sources :

Example 46 with FileSlice

use of org.apache.hudi.common.model.FileSlice in project hudi by apache.

the class TestCompactionUtils method testUpgradeDowngrade.

@Test
public void testUpgradeDowngrade() {
    Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), inputAndPlan.getValue());
    CompactionPlanMigrator migrator = new CompactionPlanMigrator(metaClient);
    HoodieCompactionPlan plan = inputAndPlan.getRight();
    System.out.println("Plan=" + plan.getOperations());
    assertEquals(LATEST_COMPACTION_METADATA_VERSION, plan.getVersion());
    HoodieCompactionPlan oldPlan = migrator.migrateToVersion(plan, plan.getVersion(), COMPACTION_METADATA_VERSION_1);
    // Check with older version of compaction plan
    assertEquals(COMPACTION_METADATA_VERSION_1, oldPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), oldPlan);
    HoodieCompactionPlan newPlan = migrator.upgradeToLatest(plan, plan.getVersion());
    assertEquals(LATEST_COMPACTION_METADATA_VERSION, newPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), newPlan);
    HoodieCompactionPlan latestPlan = migrator.migrateToVersion(oldPlan, oldPlan.getVersion(), newPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), latestPlan);
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) List(java.util.List) CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 47 with FileSlice

use of org.apache.hudi.common.model.FileSlice in project hudi by apache.

the class ClusteringTestUtils method createClusteringPlan.

public static HoodieClusteringPlan createClusteringPlan(HoodieTableMetaClient metaClient, String instantTime, String fileId) {
    try {
        String basePath = metaClient.getBasePath();
        String partition = DEFAULT_PARTITION_PATHS[0];
        createBaseFile(basePath, partition, instantTime, fileId, 1);
        FileSlice slice = new FileSlice(partition, instantTime, fileId);
        slice.setBaseFile(new CompactionTestUtils.DummyHoodieBaseFile(Paths.get(basePath, partition, baseFileName(instantTime, fileId)).toString()));
        List<FileSlice>[] fileSliceGroups = new List[] { Collections.singletonList(slice) };
        HoodieClusteringPlan clusteringPlan = ClusteringUtils.createClusteringPlan("strategy", new HashMap<>(), fileSliceGroups, Collections.emptyMap());
        return clusteringPlan;
    } catch (Exception e) {
        throw new HoodieException(e.getMessage(), e);
    }
}
Also used : FileSlice(org.apache.hudi.common.model.FileSlice) List(java.util.List) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 48 with FileSlice

use of org.apache.hudi.common.model.FileSlice in project hudi by apache.

the class HoodieBackedTableMetadataWriter method prepRecords.

/**
 * Tag each record with the location in the given partition.
 * The record is tagged with respective file slice's location based on its record key.
 */
protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap) {
    // The result set
    HoodieData<HoodieRecord> allPartitionRecords = engineContext.emptyHoodieData();
    HoodieTableFileSystemView fsView = HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
    for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : partitionRecordsMap.entrySet()) {
        final String partitionName = entry.getKey().getPartitionPath();
        final int fileGroupCount = entry.getKey().getFileGroupCount();
        HoodieData<HoodieRecord> records = entry.getValue();
        List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
        ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount, String.format("Invalid number of file groups for partition:%s, found=%d, required=%d", partitionName, fileSlices.size(), fileGroupCount));
        HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
            FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), fileGroupCount));
            r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
            return r;
        });
        allPartitionRecords = allPartitionRecords.union(rddSinglePartitionRecords);
    }
    return allPartitionRecords;
}
Also used : HoodieData(org.apache.hudi.common.data.HoodieData) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Map(java.util.Map) HashMap(java.util.HashMap)

Example 49 with FileSlice

use of org.apache.hudi.common.model.FileSlice in project hudi by apache.

the class ClusteringPlanStrategy method getFileSlicesEligibleForClustering.

/**
 * Return file slices eligible for clustering. FileIds in pending clustering/compaction are not eligible for clustering.
 */
protected Stream<FileSlice> getFileSlicesEligibleForClustering(String partition) {
    SyncableFileSystemView fileSystemView = (SyncableFileSystemView) getHoodieTable().getSliceView();
    Set<HoodieFileGroupId> fgIdsInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations().map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId()).collect(Collectors.toSet());
    fgIdsInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getKey).collect(Collectors.toSet()));
    return hoodieTable.getSliceView().getLatestFileSlices(partition).filter(slice -> !fgIdsInPendingCompactionAndClustering.contains(slice.getFileGroupId()));
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) FileSliceMetricUtils(org.apache.hudi.client.utils.FileSliceMetricUtils) BaseFile(org.apache.hudi.common.model.BaseFile) Map(java.util.Map) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) Stream(java.util.stream.Stream) ClusteringPlanPartitionFilterMode(org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair)

Example 50 with FileSlice

use of org.apache.hudi.common.model.FileSlice in project hudi by apache.

the class FileSliceDTO method toFileSlice.

public static FileSlice toFileSlice(FileSliceDTO dto) {
    FileSlice slice = new FileSlice(dto.partitionPath, dto.baseInstantTime, dto.fileId);
    slice.setBaseFile(BaseFileDTO.toHoodieBaseFile(dto.baseFile));
    dto.logFiles.stream().forEach(lf -> slice.addLogFile(LogFileDTO.toHoodieLogFile(lf)));
    return slice;
}
Also used : FileSlice(org.apache.hudi.common.model.FileSlice)

Aggregations

FileSlice (org.apache.hudi.common.model.FileSlice)87 List (java.util.List)51 ArrayList (java.util.ArrayList)45 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)45 Map (java.util.Map)44 Collectors (java.util.stream.Collectors)43 IOException (java.io.IOException)39 Path (org.apache.hadoop.fs.Path)39 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)39 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)38 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)38 Option (org.apache.hudi.common.util.Option)37 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)36 Pair (org.apache.hudi.common.util.collection.Pair)35 HashMap (java.util.HashMap)32 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)32 FSUtils (org.apache.hudi.common.fs.FSUtils)29 Stream (java.util.stream.Stream)28 Test (org.junit.jupiter.api.Test)27 HoodieFileGroup (org.apache.hudi.common.model.HoodieFileGroup)26