Search in sources :

Example 31 with HoodieFileGroup

use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.

the class TestUpgradeDowngrade method twoUpsertCommitDataWithTwoPartitions.

/**
 * Create two commits and may or may not commit 2nd commit.
 *
 * @param firstPartitionCommit2FileSlices list to hold file slices in first partition.
 * @param secondPartitionCommit2FileSlices list of hold file slices from second partition.
 * @param cfg instance of {@link HoodieWriteConfig}
 * @param client instance of {@link SparkRDDWriteClient} to use.
 * @param commitSecondUpsert true if 2nd commit needs to be committed. false otherwise.
 * @return a pair of list of records from 1st and 2nd batch.
 */
private Pair<List<HoodieRecord>, List<HoodieRecord>> twoUpsertCommitDataWithTwoPartitions(List<FileSlice> firstPartitionCommit2FileSlices, List<FileSlice> secondPartitionCommit2FileSlices, HoodieWriteConfig cfg, SparkRDDWriteClient client, boolean commitSecondUpsert) throws IOException {
    // just generate two partitions
    dataGen = new HoodieTestDataGenerator(new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH });
    // 1. prepare data
    HoodieTestDataGenerator.writePartitionMetadataDeprecated(metaClient.getFs(), new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH }, basePath);
    /**
     * Write 1 (only inserts)
     */
    String newCommitTime = "001";
    client.startCommitWithTime(newCommitTime);
    List<HoodieRecord> records = dataGen.generateInsertsContainsAllPartitions(newCommitTime, 2);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    JavaRDD<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime);
    Assertions.assertNoWriteErrors(statuses.collect());
    client.commit(newCommitTime, statuses);
    /**
     * Write 2 (updates)
     */
    newCommitTime = "002";
    client.startCommitWithTime(newCommitTime);
    List<HoodieRecord> records2 = dataGen.generateUpdates(newCommitTime, records);
    statuses = client.upsert(jsc.parallelize(records2, 1), newCommitTime);
    Assertions.assertNoWriteErrors(statuses.collect());
    if (commitSecondUpsert) {
        client.commit(newCommitTime, statuses);
    }
    // 2. assert filegroup and get the first partition fileslice
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
    List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, firstPartitionCommit2FileGroups.size());
    firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    // 3. assert filegroup and get the second partition fileslice
    List<HoodieFileGroup> secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, secondPartitionCommit2FileGroups.size());
    secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    // 4. assert fileslice
    HoodieTableType tableType = metaClient.getTableType();
    if (tableType.equals(HoodieTableType.COPY_ON_WRITE)) {
        assertEquals(2, firstPartitionCommit2FileSlices.size());
        assertEquals(2, secondPartitionCommit2FileSlices.size());
    } else {
        assertEquals(1, firstPartitionCommit2FileSlices.size());
        assertEquals(1, secondPartitionCommit2FileSlices.size());
    }
    return Pair.of(records, records2);
}
Also used : SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup)

Example 32 with HoodieFileGroup

use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableRollback method testMultiRollbackWithDeltaAndCompactionCommit.

@Test
void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
    boolean populateMetaFields = true;
    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false).withMarkersType(MarkerType.DIRECT.name());
    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
    HoodieWriteConfig cfg = cfgBuilder.build();
    Properties properties = populateMetaFields ? new Properties() : getPropertiesForKeyGen();
    properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
    HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
    try (final SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
        /*
       * Write 1 (only inserts)
       */
        String newCommitTime = "001";
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
        JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
        JavaRDD<WriteStatus> writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
        List<WriteStatus> statuses = writeStatusJavaRDD.collect();
        assertNoWriteErrors(statuses);
        client.commit(newCommitTime, jsc().parallelize(statuses));
        client.close();
        Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantCommitMetadataPairOpt = metaClient.getActiveTimeline().getLastCommitMetadataWithValidData();
        assertTrue(instantCommitMetadataPairOpt.isPresent());
        HoodieInstant commitInstant = instantCommitMetadataPairOpt.get().getKey();
        assertEquals("001", commitInstant.getTimestamp());
        assertEquals(HoodieTimeline.DELTA_COMMIT_ACTION, commitInstant.getAction());
        assertEquals(200, getTotalRecordsWritten(instantCommitMetadataPairOpt.get().getValue()));
        Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
        assertFalse(commit.isPresent());
        HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
        FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
        HoodieTableFileSystemView tableView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
        Stream<HoodieBaseFile> dataFilesToRead = tableView.getLatestBaseFiles();
        assertFalse(dataFilesToRead.findAny().isPresent());
        tableView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
        dataFilesToRead = tableView.getLatestBaseFiles();
        assertTrue(dataFilesToRead.findAny().isPresent(), "Should list the base files we wrote in the delta commit");
        /*
       * Write 2 (inserts + updates)
       */
        newCommitTime = "002";
        // WriteClient with custom config (disable small file handling)
        HoodieWriteConfig smallFileWriteConfig = getHoodieWriteConfigWithSmallFileHandlingOffBuilder(populateMetaFields).withMarkersType(MarkerType.DIRECT.name()).build();
        try (SparkRDDWriteClient nClient = getHoodieWriteClient(smallFileWriteConfig)) {
            nClient.startCommitWithTime(newCommitTime);
            List<HoodieRecord> copyOfRecords = new ArrayList<>(records);
            copyOfRecords = dataGen.generateUpdates(newCommitTime, copyOfRecords);
            copyOfRecords.addAll(dataGen.generateInserts(newCommitTime, 200));
            List<String> dataFiles = tableView.getLatestBaseFiles().map(baseFile -> new Path(baseFile.getPath()).getParent().toString()).collect(Collectors.toList());
            List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles, basePath());
            assertEquals(200, recordsRead.size());
            statuses = nClient.upsert(jsc().parallelize(copyOfRecords, 1), newCommitTime).collect();
            // Verify there are no errors
            assertNoWriteErrors(statuses);
            nClient.commit(newCommitTime, jsc().parallelize(statuses));
            copyOfRecords.clear();
        }
        // Schedule a compaction
        /*
       * Write 3 (inserts + updates)
       */
        newCommitTime = "003";
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> newInserts = dataGen.generateInserts(newCommitTime, 100);
        records = dataGen.generateUpdates(newCommitTime, records);
        records.addAll(newInserts);
        writeRecords = jsc().parallelize(records, 1);
        writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
        statuses = writeStatusJavaRDD.collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        client.commit(newCommitTime, jsc().parallelize(statuses));
        metaClient = HoodieTableMetaClient.reload(metaClient);
        String compactionInstantTime = "004";
        client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
        // Compaction commit
        /*
       * Write 4 (updates)
       */
        newCommitTime = "005";
        client.startCommitWithTime(newCommitTime);
        records = dataGen.generateUpdates(newCommitTime, records);
        writeRecords = jsc().parallelize(records, 1);
        writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
        statuses = writeStatusJavaRDD.collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        client.commit(newCommitTime, jsc().parallelize(statuses));
        metaClient = HoodieTableMetaClient.reload(metaClient);
        compactionInstantTime = "006";
        client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
        client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
        allFiles = listAllBaseFilesInPath(hoodieTable);
        metaClient = HoodieTableMetaClient.reload(metaClient);
        tableView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles);
        final String compactedCommitTime = metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get().getTimestamp();
        assertTrue(tableView.getLatestBaseFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
        /*
       * Write 5 (updates)
       */
        newCommitTime = "007";
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> copyOfRecords = new ArrayList<>(records);
        copyOfRecords = dataGen.generateUpdates(newCommitTime, copyOfRecords);
        copyOfRecords.addAll(dataGen.generateInserts(newCommitTime, 200));
        statuses = client.upsert(jsc().parallelize(copyOfRecords, 1), newCommitTime).collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        client.commit(newCommitTime, jsc().parallelize(statuses));
        copyOfRecords.clear();
        // Rollback latest commit first
        client.restoreToInstant("000");
        metaClient = HoodieTableMetaClient.reload(metaClient);
        allFiles = listAllBaseFilesInPath(hoodieTable);
        tableView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
        dataFilesToRead = tableView.getLatestBaseFiles();
        assertFalse(dataFilesToRead.findAny().isPresent());
        TableFileSystemView.SliceView rtView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
        List<HoodieFileGroup> fileGroups = ((HoodieTableFileSystemView) rtView).getAllFileGroups().collect(Collectors.toList());
        assertTrue(fileGroups.isEmpty());
        // make sure there are no log files remaining
        assertEquals(0L, ((HoodieTableFileSystemView) rtView).getAllFileGroups().filter(fileGroup -> fileGroup.getAllRawFileSlices().noneMatch(f -> f.getLogFiles().count() == 0)).count());
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieMergeOnReadTestUtils(org.apache.hudi.testutils.HoodieMergeOnReadTestUtils) Arrays(java.util.Arrays) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) Assertions.assertAll(org.junit.jupiter.api.Assertions.assertAll) Tag(org.junit.jupiter.api.Tag) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) GenericRecord(org.apache.avro.generic.GenericRecord) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) WriteStatus(org.apache.hudi.client.WriteStatus) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 33 with HoodieFileGroup

use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.

the class TestHoodieTableFileSystemView method testPendingCompactionWithDuplicateFileIdsAcrossPartitions.

@Test
public void testPendingCompactionWithDuplicateFileIdsAcrossPartitions() throws Exception {
    // Put some files in the partition
    String partitionPath1 = "2016/05/01";
    String partitionPath2 = "2016/05/02";
    String partitionPath3 = "2016/05/03";
    String fullPartitionPath1 = basePath + "/" + partitionPath1 + "/";
    new File(fullPartitionPath1).mkdirs();
    String fullPartitionPath2 = basePath + "/" + partitionPath2 + "/";
    new File(fullPartitionPath2).mkdirs();
    String fullPartitionPath3 = basePath + "/" + partitionPath3 + "/";
    new File(fullPartitionPath3).mkdirs();
    String instantTime1 = "1";
    String deltaInstantTime1 = "2";
    String deltaInstantTime2 = "3";
    String fileId = UUID.randomUUID().toString();
    String dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
    new File(fullPartitionPath1 + dataFileName).createNewFile();
    String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
    new File(fullPartitionPath1 + fileName1).createNewFile();
    new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
    new File(fullPartitionPath2 + fileName1).createNewFile();
    new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
    new File(fullPartitionPath3 + fileName1).createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
    HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
    HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
    // Now we list all partitions
    FileStatus[] statuses = metaClient.getFs().listStatus(new Path[] { new Path(fullPartitionPath1), new Path(fullPartitionPath2), new Path(fullPartitionPath3) });
    assertEquals(6, statuses.length);
    refreshFsView();
    Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(p -> fsView.getAllFileGroups(p).count());
    List<HoodieFileGroup> groups = Stream.of(partitionPath1, partitionPath2, partitionPath3).flatMap(p -> fsView.getAllFileGroups(p)).collect(Collectors.toList());
    assertEquals(3, groups.size(), "Expected number of file-groups");
    assertEquals(3, groups.stream().map(HoodieFileGroup::getPartitionPath).collect(Collectors.toSet()).size(), "Partitions must be different for file-groups");
    Set<String> fileIds = groups.stream().map(HoodieFileGroup::getFileGroupId).map(HoodieFileGroupId::getFileId).collect(Collectors.toSet());
    assertEquals(1, fileIds.size(), "File Id must be same");
    assertTrue(fileIds.contains(fileId), "Expected FileId");
    // Setup Pending compaction for all of these fileIds.
    List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
    List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath1).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath1, fileSlices.get(0)));
    fileSlices = rtView.getLatestFileSlices(partitionPath2).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath2, fileSlices.get(0)));
    fileSlices = rtView.getLatestFileSlices(partitionPath3).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
    String compactionRequestedTime = "2";
    String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
    HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
    // Create a Data-file for some of the partitions but this should be skipped by view
    new File(basePath + "/" + partitionPath1 + "/" + compactDataFileName).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + compactDataFileName).createNewFile();
    HoodieInstant compactionInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
    HoodieInstant requested = HoodieTimeline.getCompactionRequestedInstant(compactionInstant.getTimestamp());
    metaClient.getActiveTimeline().saveToCompactionRequested(requested, TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
    metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(requested);
    // Fake delta-ingestion after compaction-requested
    String deltaInstantTime4 = "4";
    String deltaInstantTime5 = "6";
    String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
    String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
    new File(basePath + "/" + partitionPath3 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath3 + "/" + fileName4).createNewFile();
    HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
    HoodieInstant deltaInstant5 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime5);
    saveAsComplete(commitTimeline, deltaInstant4, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant5, Option.empty());
    refreshFsView();
    // Test Data Files
    List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath1).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
    assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
    dataFiles = roView.getAllBaseFiles(partitionPath2).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
    assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
    // Merge API Tests
    Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(partitionPath -> {
        List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
        assertEquals(1, fileSliceList.size(), "Expect file-slice to be merged");
        FileSlice fileSlice = fileSliceList.get(0);
        assertEquals(fileId, fileSlice.getFileId());
        assertEquals(dataFileName, fileSlice.getBaseFile().get().getFileName(), "Data file must be present");
        assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant of penultimate file-slice must be base instant");
        List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
        assertEquals(3, logFiles.size(), "Log files must include those after compaction request");
        assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
        assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
        assertEquals(fileName1, logFiles.get(2).getFileName(), "Log File Order check");
        fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
        assertEquals(1, fileSliceList.size(), "Expect only one file-id");
        fileSlice = fileSliceList.get(0);
        assertEquals(fileId, fileSlice.getFileId());
        assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
        assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant");
        logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
        assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
        assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
        assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
        // Check getLatestFileSlicesBeforeOrOn excluding fileIds in pending compaction
        fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false).collect(Collectors.toList());
        assertEquals(0, fileSliceList.size(), "Expect empty list as file-id is in pending compaction");
    });
    assertEquals(3, fsView.getPendingCompactionOperations().count());
    Set<String> partitionsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getPartitionPath).collect(Collectors.toSet());
    assertEquals(3, partitionsInCompaction.size());
    assertTrue(partitionsInCompaction.contains(partitionPath1));
    assertTrue(partitionsInCompaction.contains(partitionPath2));
    assertTrue(partitionsInCompaction.contains(partitionPath3));
    Set<String> fileIdsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getFileId).collect(Collectors.toSet());
    assertEquals(1, fileIdsInCompaction.size());
    assertTrue(fileIdsInCompaction.contains(fileId));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodiePath(org.apache.hudi.avro.model.HoodiePath) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 34 with HoodieFileGroup

use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.

the class TestHoodieTableFileSystemView method testReplaceWithTimeTravel.

@Test
public void testReplaceWithTimeTravel() throws IOException {
    String partitionPath1 = "2020/06/27";
    new File(basePath + "/" + partitionPath1).mkdirs();
    // create 2 fileId in partition1 - fileId1 is replaced later on.
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    assertFalse(roView.getLatestBaseFiles(partitionPath1).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2)), "No commit, should not find any data file");
    // Only one commit
    String commitTime1 = "1";
    String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
    String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
    new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    refreshFsView();
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    // create commit2 - fileId1 is replaced. new file groups fileId3,fileId4 are created.
    String fileId3 = UUID.randomUUID().toString();
    String fileId4 = UUID.randomUUID().toString();
    String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
    String fileName4 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId4);
    new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
    String commitTime2 = "2";
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add(fileId1);
    partitionToReplaceFileIds.put(partitionPath1, replacedFileIds);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
    commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
    saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    // make sure view doesnt include fileId1
    refreshFsView();
    assertEquals(0, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
    assertEquals(1, roView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
    // exclude commit 2 and make sure fileId1 shows up in view.
    SyncableFileSystemView filteredView = getFileSystemView(metaClient.getActiveTimeline().findInstantsBefore("2"), false);
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId1)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId2)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId3)).count());
    assertEquals(1, filteredView.getLatestBaseFiles(partitionPath1).filter(dfile -> dfile.getFileId().equals(fileId4)).count());
    // ensure replacedFileGroupsBefore works with all instants
    List<HoodieFileGroup> replacedOnInstant1 = fsView.getReplacedFileGroupsBeforeOrOn("1", partitionPath1).collect(Collectors.toList());
    assertEquals(0, replacedOnInstant1.size());
    List<HoodieFileGroup> allReplaced = fsView.getReplacedFileGroupsBeforeOrOn("2", partitionPath1).collect(Collectors.toList());
    assertEquals(1, allReplaced.size());
    assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
    allReplaced = fsView.getReplacedFileGroupsBefore("2", partitionPath1).collect(Collectors.toList());
    assertEquals(0, allReplaced.size());
    allReplaced = fsView.getAllReplacedFileGroups(partitionPath1).collect(Collectors.toList());
    assertEquals(1, allReplaced.size());
    assertEquals(fileId1, allReplaced.get(0).getFileGroupId().getFileId());
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) ArrayList(java.util.ArrayList) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 35 with HoodieFileGroup

use of org.apache.hudi.common.model.HoodieFileGroup in project hudi by apache.

the class TestHoodieTableFileSystemView method testStreamLatestVersions.

protected void testStreamLatestVersions(boolean isLatestFileSliceOnly) throws IOException {
    // Put some files in the partition
    String partitionPath = "2016/05/01";
    String fullPartitionPath = basePath + "/" + partitionPath;
    new File(fullPartitionPath).mkdirs();
    String commitTime1 = "1";
    String commitTime2 = "2";
    String commitTime3 = "3";
    String commitTime4 = "4";
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    String fileId3 = UUID.randomUUID().toString();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, TEST_WRITE_TOKEN)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, TEST_WRITE_TOKEN)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
    new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
    // Now we list the entire partition
    FileStatus[] statuses = metaClient.getFs().listStatus(new Path(fullPartitionPath));
    assertEquals(10, statuses.length);
    refreshFsView();
    fsView.getAllBaseFiles(partitionPath);
    List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath).collect(Collectors.toList());
    assertEquals(3, fileGroups.size());
    for (HoodieFileGroup fileGroup : fileGroups) {
        List<FileSlice> slices = fileGroup.getAllFileSlices().collect(Collectors.toList());
        String fileId = fileGroup.getFileGroupId().getFileId();
        if (fileId.equals(fileId1)) {
            assertEquals(isLatestFileSliceOnly ? 1 : 2, slices.size());
            assertEquals(commitTime4, slices.get(0).getBaseInstantTime());
            if (!isLatestFileSliceOnly) {
                assertEquals(commitTime1, slices.get(1).getBaseInstantTime());
            }
        } else if (fileId.equals(fileId2)) {
            assertEquals(isLatestFileSliceOnly ? 1 : 3, slices.size());
            assertEquals(commitTime3, slices.get(0).getBaseInstantTime());
            if (!isLatestFileSliceOnly) {
                assertEquals(commitTime2, slices.get(1).getBaseInstantTime());
                assertEquals(commitTime1, slices.get(2).getBaseInstantTime());
            }
        } else if (fileId.equals(fileId3)) {
            assertEquals(isLatestFileSliceOnly ? 1 : 2, slices.size());
            assertEquals(commitTime4, slices.get(0).getBaseInstantTime());
            if (!isLatestFileSliceOnly) {
                assertEquals(commitTime3, slices.get(1).getBaseInstantTime());
            }
        }
    }
    List<HoodieBaseFile> statuses1 = roView.getLatestBaseFiles().collect(Collectors.toList());
    assertEquals(3, statuses1.size());
    Set<String> filenames = new HashSet<>();
    for (HoodieBaseFile status : statuses1) {
        filenames.add(status.getFileName());
    }
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

HoodieFileGroup (org.apache.hudi.common.model.HoodieFileGroup)38 FileSlice (org.apache.hudi.common.model.FileSlice)29 IOException (java.io.IOException)27 Map (java.util.Map)27 List (java.util.List)26 Collectors (java.util.stream.Collectors)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)22 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)22 Option (org.apache.hudi.common.util.Option)22 Path (org.apache.hadoop.fs.Path)21 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)21 ArrayList (java.util.ArrayList)20 FileStatus (org.apache.hadoop.fs.FileStatus)19 Pair (org.apache.hudi.common.util.collection.Pair)19 LogManager (org.apache.log4j.LogManager)18 Logger (org.apache.log4j.Logger)18 Set (java.util.Set)17 Stream (java.util.stream.Stream)17 FSUtils (org.apache.hudi.common.fs.FSUtils)17