Search in sources :

Example 6 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class TestCleaner method runCleaner.

/**
 * Helper to run cleaner and collect Clean Stats.
 *
 * @param config HoodieWriteConfig
 */
private List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure, int firstCommitSequence) throws IOException {
    SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(config);
    String cleanInstantTs = makeNewCommitTime(firstCommitSequence);
    HoodieCleanMetadata cleanMetadata1 = writeClient.clean(cleanInstantTs);
    if (null == cleanMetadata1) {
        return new ArrayList<>();
    }
    if (simulateRetryFailure) {
        HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs);
        HoodieCleanMetadata metadata = CleanerUtils.getCleanerMetadata(metaClient, completedCleanInstant);
        metadata.getPartitionMetadata().values().forEach(p -> {
            String dirPath = metaClient.getBasePath() + "/" + p.getPartitionPath();
            p.getSuccessDeleteFiles().forEach(p2 -> {
                try {
                    metaClient.getFs().create(new Path(dirPath, p2), true);
                } catch (IOException e) {
                    throw new HoodieIOException(e.getMessage(), e);
                }
            });
        });
        metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
        // retry clean operation again
        writeClient.clean();
        final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
        cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> {
            HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k);
            HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k);
            assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
            assertEquals(p1.getSuccessDeleteFiles(), p2.getSuccessDeleteFiles());
            assertEquals(p1.getFailedDeleteFiles(), p2.getFailedDeleteFiles());
            assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
            assertEquals(k, p1.getPartitionPath());
        });
    }
    Map<String, HoodieCleanStat> cleanStatMap = cleanMetadata1.getPartitionMetadata().values().stream().map(x -> new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath()).withFailedDeletes(x.getFailedDeleteFiles()).withSuccessfulDeletes(x.getSuccessDeleteFiles()).withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(x.getDeletePathPatterns()).withEarliestCommitRetained(Option.ofNullable(cleanMetadata1.getEarliestCommitToRetain() != null ? new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "000") : null)).build()).collect(Collectors.toMap(HoodieCleanStat::getPartitionPath, x -> x));
    cleanMetadata1.getBootstrapPartitionMetadata().values().forEach(x -> {
        HoodieCleanStat s = cleanStatMap.get(x.getPartitionPath());
        cleanStatMap.put(x.getPartitionPath(), new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath()).withFailedDeletes(s.getFailedDeleteFiles()).withSuccessfulDeletes(s.getSuccessDeleteFiles()).withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(s.getDeletePathPatterns()).withEarliestCommitRetained(Option.ofNullable(s.getEarliestCommitToRetain()).map(y -> new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, y))).withSuccessfulDeleteBootstrapBaseFiles(x.getSuccessDeleteFiles()).withFailedDeleteBootstrapBaseFiles(x.getFailedDeleteFiles()).withDeleteBootstrapBasePathPatterns(x.getDeletePathPatterns()).build());
    });
    return new ArrayList<>(cleanStatMap.values());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) Arrays(java.util.Arrays) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanPlanner(org.apache.hudi.table.action.clean.CleanPlanner) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Awaitility.await(org.awaitility.Awaitility.await) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) CleanPlanMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) CleanPlanV1MigrationHandler(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StringUtils(org.apache.hudi.common.util.StringUtils) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTestCommitGenerator.getBaseFilename(org.apache.hudi.HoodieTestCommitGenerator.getBaseFilename) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieTestTable.makeIncrementalCommitTimes(org.apache.hudi.common.testutils.HoodieTestTable.makeIncrementalCommitTimes) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) MethodSource(org.junit.jupiter.params.provider.MethodSource) IOType(org.apache.hudi.common.model.IOType) Predicate(java.util.function.Predicate) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Tuple3(scala.Tuple3) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) Test(org.junit.jupiter.api.Test) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) CleanMetadataMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) FileSlice(org.apache.hudi.common.model.FileSlice) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) TestBootstrapIndex(org.apache.hudi.common.bootstrap.TestBootstrapIndex) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) TimeUnit(java.util.concurrent.TimeUnit) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieTestTable.makeNewCommitTime(org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)

Example 7 with HoodieCleanPartitionMetadata

use of org.apache.hudi.avro.model.HoodieCleanPartitionMetadata in project hudi by apache.

the class TestTimelineUtils method getCleanMetadata.

private Option<byte[]> getCleanMetadata(String partition, String time) throws IOException {
    Map<String, HoodieCleanPartitionMetadata> partitionToFilesCleaned = new HashMap<>();
    List<String> filesDeleted = new ArrayList<>();
    filesDeleted.add("file-" + partition + "-" + time + "1");
    filesDeleted.add("file-" + partition + "-" + time + "2");
    HoodieCleanPartitionMetadata partitionMetadata = HoodieCleanPartitionMetadata.newBuilder().setPartitionPath(partition).setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).setFailedDeleteFiles(Collections.emptyList()).setDeletePathPatterns(Collections.emptyList()).setSuccessDeleteFiles(filesDeleted).build();
    partitionToFilesCleaned.putIfAbsent(partition, partitionMetadata);
    HoodieCleanMetadata cleanMetadata = HoodieCleanMetadata.newBuilder().setVersion(1).setTimeTakenInMillis(100).setTotalFilesDeleted(1).setStartCleanTime(time).setEarliestCommitToRetain(time).setPartitionMetadata(partitionToFilesCleaned).build();
    return TimelineMetadataUtils.serializeCleanMetadata(cleanMetadata);
}
Also used : HashMap(java.util.HashMap) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) ArrayList(java.util.ArrayList) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata)

Aggregations

HoodieCleanPartitionMetadata (org.apache.hudi.avro.model.HoodieCleanPartitionMetadata)7 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 ArrayList (java.util.ArrayList)3 Collectors (java.util.stream.Collectors)3 Path (org.apache.hadoop.fs.Path)3 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)3 Pair (org.apache.hudi.common.util.collection.Pair)3 List (java.util.List)2 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)2 File (java.io.File)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1