Search in sources :

Example 76 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestHoodieTableFileSystemView method testReplaceWithTimeTravel.

@Test
public void testReplaceWithTimeTravel() throws IOException {
    String partitionPath1 = "2020/06/27";
    new File(basePath + "/" + partitionPath1).mkdirs();
    // create 2 fileId in partition1 - fileId1 is replaced later on.
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    assertFalse(roView.getLatestBaseFiles(partitionPath1).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2)), "No commit, should not find any data file");
    // Only one commit
    String commitTime1 = "1";
    String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
    String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
    new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    refreshFsView();
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    // create commit2 - fileId1 is replaced. new file groups fileId3,fileId4 are created.
    String fileId3 = UUID.randomUUID().toString();
    String fileId4 = UUID.randomUUID().toString();
    String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
    String fileName4 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
    new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
    String commitTime2 = "2";
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add(fileId1);
    partitionToReplaceFileIds.put(partitionPath1, replacedFileIds);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
    commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
    saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    // make sure view doesnt include fileId1
    refreshFsView();
    assertEquals(0, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
    // exclude commit 2 and make sure fileId1 shows up in view.
    SyncableFileSystemView filteredView = getFileSystemView(metaClient.getActiveTimeline().findInstantsBefore("2"), false);
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
    // ensure replacedFileGroupsBefore works with all instants
    List<HoodieFileGroup> replacedOnInstant1 = fsView.getReplacedFileGroupsBeforeOrOn("1", partitionPath1).collect(Collectors.toList());
    assertEquals(0, replacedOnInstant1.size());
    List<HoodieFileGroup> allReplaced = fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath1).collect(Collectors.toList());
    assertEquals(1, allReplaced.size());
    assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
    allReplaced = fsView.getReplacedFileGroupsBefore("2", partitionPath1).collect(Collectors.toList());
    assertEquals(0, allReplaced.size());
    allReplaced = fsView.getAllReplacedFileGroups(partitionPath1).collect(Collectors.toList());
    assertEquals(1, allReplaced.size());
    assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) ArrayList(java.util.ArrayList) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 77 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestHoodieTableFileSystemView method testCloseHoodieTableFileSystemView.

@Test
public void testCloseHoodieTableFileSystemView() throws Exception {
    String instantTime1 = "1";
    String instantTime2 = "2";
    String clusteringInstantTime3 = "3";
    String clusteringInstantTime4 = "4";
    // prepare metadata
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add("fake_file_id");
    partitionToReplaceFileIds.put("fake_partition_path", replacedFileIds);
    // prepare Instants
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime2);
    HoodieInstant clusteringInstant3 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringInstantTime3);
    HoodieInstant clusteringInstant4 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringInstantTime4);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.CLUSTER, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    saveAsComplete(commitTimeline, instant2, Option.empty());
    saveAsComplete(commitTimeline, clusteringInstant3, Option.empty());
    saveAsComplete(commitTimeline, clusteringInstant4, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    refreshFsView();
    // Now create a scenario where archiving deleted replace commits (requested,inflight and replacecommit)
    boolean deleteReplaceCommit = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit").delete();
    boolean deleteReplaceCommitRequested = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit.requested").delete();
    boolean deleteReplaceCommitInflight = new File(this.basePath + "/.hoodie/" + clusteringInstantTime3 + ".replacecommit.inflight").delete();
    // confirm deleted
    assertTrue(deleteReplaceCommit && deleteReplaceCommitInflight && deleteReplaceCommitRequested);
    assertDoesNotThrow(() -> fsView.close());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 78 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class HoodieTableMetadataUtil method processRollbackMetadata.

/**
 * Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
 * <p>
 * During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
 * function will extract this change file for each partition.
 *
 * @param metadataTableTimeline    Current timeline of the Metadata Table
 * @param rollbackMetadata         {@code HoodieRollbackMetadata}
 * @param partitionToDeletedFiles  The {@code Map} to fill with files deleted per partition.
 * @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
 */
private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, Map<String, List<String>> partitionToDeletedFiles, Map<String, Map<String, Long>> partitionToAppendedFiles, Option<String> lastSyncTs) {
    rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
        final String instantToRollback = rollbackMetadata.getCommitsRollback().get(0);
        // Has this rollback produced new files?
        boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
        boolean hasNonZeroRollbackLogFiles = hasRollbackLogFiles && pm.getRollbackLogFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
        // If instant-to-rollback has not been synced to metadata table yet then there is no need to update metadata
        // This can happen in two cases:
        // Case 1: Metadata Table timeline is behind the instant-to-rollback.
        boolean shouldSkip = lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(instantToRollback, HoodieTimeline.GREATER_THAN, lastSyncTs.get());
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s", instantToRollback, lastSyncTs.get()));
            return;
        }
        // Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
        // was a failed commit (never completed) as only completed instants are synced to Metadata Table.
        // But the required Metadata Table instants should not have been archived
        HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
        if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
            throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived", syncedInstant, instantToRollback));
        }
        shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
        if (!hasNonZeroRollbackLogFiles && shouldSkip) {
            LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table", instantToRollback));
            return;
        }
        final String partition = pm.getPartitionPath();
        if ((!pm.getSuccessDeleteFiles().isEmpty() || !pm.getFailedDeleteFiles().isEmpty()) && !shouldSkip) {
            if (!partitionToDeletedFiles.containsKey(partition)) {
                partitionToDeletedFiles.put(partition, new ArrayList<>());
            }
            // Extract deleted file name from the absolute paths saved in getSuccessDeleteFiles()
            List<String> deletedFiles = pm.getSuccessDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList());
            if (!pm.getFailedDeleteFiles().isEmpty()) {
                deletedFiles.addAll(pm.getFailedDeleteFiles().stream().map(p -> new Path(p).getName()).collect(Collectors.toList()));
            }
            partitionToDeletedFiles.get(partition).addAll(deletedFiles);
        }
        BiFunction<Long, Long, Long> fileMergeFn = (oldSize, newSizeCopy) -> {
            // as rollback file could have been updated after written log files are computed.
            return oldSize > newSizeCopy ? oldSize : newSizeCopy;
        };
        if (hasRollbackLogFiles) {
            if (!partitionToAppendedFiles.containsKey(partition)) {
                partitionToAppendedFiles.put(partition, new HashMap<>());
            }
            // Extract appended file name from the absolute paths saved in getAppendFiles()
            pm.getRollbackLogFiles().forEach((path, size) -> {
                partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
            });
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieColumnRangeMetadata(org.apache.hudi.common.model.HoodieColumnRangeMetadata) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) ByteBuffer(java.nio.ByteBuffer) MAX(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) Collectors(java.util.stream.Collectors) TOTAL_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE) Objects(java.util.Objects) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) VALUE_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) TOTAL_UNCOMPRESSED_SIZE(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE) EMPTY_PARTITION_NAME(org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) NULL_COUNT(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) LinkedList(java.util.LinkedList) Nonnull(javax.annotation.Nonnull) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString) GenericRecord(org.apache.avro.generic.GenericRecord) MIN(org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN) HoodieData(org.apache.hudi.common.data.HoodieData) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) ParquetUtils(org.apache.hudi.common.util.ParquetUtils) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) NON_PARTITIONED_NAME(org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) COLUMN_RANGE_MERGE_FUNCTION(org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieAvroUtils.getNestedFieldValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString)

Example 79 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class HoodieBackedTableMetadata method getValidInstantTimestamps.

private Set<String> getValidInstantTimestamps() {
    // Only those log files which have a corresponding completed instant on the dataset should be read
    // This is because the metadata table is updated before the dataset instants are committed.
    HoodieActiveTimeline datasetTimeline = dataMetaClient.getActiveTimeline();
    Set<String> validInstantTimestamps = datasetTimeline.filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
    // For any rollbacks and restores, we cannot neglect the instants that they are rolling back.
    // The rollback instant should be more recent than the start of the timeline for it to have rolled back any
    // instant which we have a log block for.
    final String earliestInstantTime = validInstantTimestamps.isEmpty() ? SOLO_COMMIT_TIMESTAMP : Collections.min(validInstantTimestamps);
    datasetTimeline.getRollbackAndRestoreTimeline().filterCompletedInstants().getInstants().filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN, earliestInstantTime)).forEach(instant -> {
        validInstantTimestamps.addAll(getRollbackedCommits(instant, datasetTimeline));
    });
    // SOLO_COMMIT_TIMESTAMP is used during bootstrap so it is a valid timestamp
    validInstantTimestamps.add(SOLO_COMMIT_TIMESTAMP);
    return validInstantTimestamps;
}
Also used : HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LinkedList(java.util.LinkedList) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) SpillableMapUtils(org.apache.hudi.common.util.SpillableMapUtils) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline)

Example 80 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestHoodieTableMetaClient method checkSerDe.

@Test
public void checkSerDe() {
    // check if this object is serialized and de-serialized, we are able to read from the file system
    HoodieTableMetaClient deserializedMetaClient = HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
    assertNotNull(deserializedMetaClient);
    HoodieActiveTimeline commitTimeline = deserializedMetaClient.getActiveTimeline();
    HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
    commitTimeline.createNewInstant(instant);
    commitTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes()));
    commitTimeline = commitTimeline.reload();
    HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
    assertEquals(completedInstant, commitTimeline.getInstants().findFirst().get(), "Commit should be 1 and completed");
    assertArrayEquals("test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\"");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)95 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)70 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)47 Test (org.junit.jupiter.api.Test)45 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)37 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)32 List (java.util.List)30 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)30 HashMap (java.util.HashMap)28 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)26 Map (java.util.Map)25 Option (org.apache.hudi.common.util.Option)22 Pair (org.apache.hudi.common.util.collection.Pair)22 Collectors (java.util.stream.Collectors)21 Path (org.apache.hadoop.fs.Path)21 Logger (org.apache.log4j.Logger)21 LogManager (org.apache.log4j.LogManager)20 Stream (java.util.stream.Stream)19 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)19