Search in sources :

Example 1 with IndexWriter

use of org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter in project hudi by apache.

the class TestHoodieTableFileSystemView method testViewForFileSlicesWithAsyncCompaction.

/**
 * Helper method to test Views in the presence of concurrent compaction.
 *
 * @param skipCreatingDataFile if set, first File Slice will not have data-file set. This would simulate inserts going
 *        directly to log files
 * @param isCompactionInFlight if set, compaction was inflight (running) when view was tested first time, otherwise
 *        compaction was in requested state
 * @param expTotalFileSlices Total number of file-slices across file-groups in the partition path
 * @param expTotalDataFiles Total number of data-files across file-groups in the partition path
 * @param includeInvalidAndInflight Whether view includes inflight and invalid file-groups.
 * @param testBootstrap enable Bootstrap and test
 * @throws Exception -
 */
protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingDataFile, boolean isCompactionInFlight, int expTotalFileSlices, int expTotalDataFiles, boolean includeInvalidAndInflight, boolean testBootstrap) throws Exception {
    if (testBootstrap) {
        metaClient = HoodieTestUtils.init(tempDir.toAbsolutePath().toString(), getTableType(), BOOTSTRAP_SOURCE_PATH, testBootstrap);
    }
    String partitionPath = "2016/05/01";
    new File(basePath + "/" + partitionPath).mkdirs();
    String fileId = UUID.randomUUID().toString();
    String srcName = "part_0000" + metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    HoodieFileStatus srcFileStatus = HoodieFileStatus.newBuilder().setPath(HoodiePath.newBuilder().setUri(BOOTSTRAP_SOURCE_PATH + partitionPath + "/" + srcName).build()).setLength(256 * 1024 * 1024L).setAccessTime(new Date().getTime()).setModificationTime(new Date().getTime() + 99999).setBlockReplication(2).setOwner("hudi").setGroup("hudi").setBlockSize(128 * 1024 * 1024L).setPermission(HoodieFSPermission.newBuilder().setUserAction(FsAction.ALL.name()).setGroupAction(FsAction.READ.name()).setOtherAction(FsAction.NONE.name()).setStickyBit(true).build()).build();
    // if skipCreatingDataFile, then instantTime1 below acts like delta-commit, otherwise it is base-commit
    String instantTime1 = testBootstrap && !skipCreatingDataFile ? HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS : "1";
    String deltaInstantTime1 = "2";
    String deltaInstantTime2 = "3";
    String dataFileName = null;
    if (!skipCreatingDataFile) {
        dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
        new File(basePath + "/" + partitionPath + "/" + dataFileName).createNewFile();
    }
    String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
    String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
    new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
    HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
    HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
    if (testBootstrap && !skipCreatingDataFile) {
        try (IndexWriter writer = new HFileBootstrapIndex(metaClient).createWriter(BOOTSTRAP_SOURCE_PATH)) {
            writer.begin();
            BootstrapFileMapping mapping = new BootstrapFileMapping(BOOTSTRAP_SOURCE_PATH, partitionPath, partitionPath, srcFileStatus, fileId);
            List<BootstrapFileMapping> b = new ArrayList<>();
            b.add(mapping);
            writer.appendNextPartition(partitionPath, b);
            writer.finish();
        }
    }
    saveAsComplete(commitTimeline, instant1, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
    refreshFsView();
    List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
    assertEquals(1, fileSlices.size());
    FileSlice fileSlice = fileSlices.get(0);
    assertEquals(instantTime1, fileSlice.getBaseInstantTime());
    if (!skipCreatingDataFile) {
        assertTrue(fileSlice.getBaseFile().isPresent());
        checkExternalFile(srcFileStatus, fileSlice.getBaseFile().get().getBootstrapBaseFile(), testBootstrap);
    }
    String compactionRequestedTime = "4";
    String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
    List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
    partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
    HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
    HoodieInstant compactionInstant;
    if (isCompactionInFlight) {
        // Create a Data-file but this should be skipped by view
        new File(basePath + "/" + partitionPath + "/" + compactDataFileName).createNewFile();
        compactionInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
        HoodieInstant requested = HoodieTimeline.getCompactionRequestedInstant(compactionInstant.getTimestamp());
        commitTimeline.saveToCompactionRequested(requested, TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
        commitTimeline.transitionCompactionRequestedToInflight(requested);
    } else {
        compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
        commitTimeline.saveToCompactionRequested(compactionInstant, TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
    }
    // View immediately after scheduling compaction
    refreshFsView();
    List<FileSlice> slices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
    assertEquals(1, slices.size(), "Expected latest file-slices");
    assertEquals(compactionRequestedTime, slices.get(0).getBaseInstantTime(), "Base-Instant must be compaction Instant");
    assertFalse(slices.get(0).getBaseFile().isPresent(), "Latest File Slice must not have data-file");
    assertEquals(0, slices.get(0).getLogFiles().count(), "Latest File Slice must not have any log-files");
    // Fake delta-ingestion after compaction-requested
    String deltaInstantTime4 = "5";
    String deltaInstantTime5 = "6";
    List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2, compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
    String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
    String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
    HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
    HoodieInstant deltaInstant5 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime5);
    saveAsComplete(commitTimeline, deltaInstant4, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant5, Option.empty());
    refreshFsView();
    List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath).collect(Collectors.toList());
    if (skipCreatingDataFile) {
        assertTrue(dataFiles.isEmpty(), "No data file expected");
    } else {
        assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
        assertEquals(dataFileName, dataFiles.get(0).getFileName(), "Expect only valid data-file");
    }
    // Merge API Tests
    List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
    assertEquals(1, fileSliceList.size(), "Expect file-slice to be merged");
    fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId());
    if (!skipCreatingDataFile) {
        assertEquals(dataFileName, fileSlice.getBaseFile().get().getFileName(), "Data file must be present");
        checkExternalFile(srcFileStatus, fileSlice.getBaseFile().get().getBootstrapBaseFile(), testBootstrap);
    } else {
        assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected as it was not created");
    }
    assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant of penultimate file-slice must be base instant");
    List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(4, logFiles.size(), "Log files must include those after compaction request");
    assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
    assertEquals(fileName2, logFiles.get(2).getFileName(), "Log File Order check");
    assertEquals(fileName1, logFiles.get(3).getFileName(), "Log File Order check");
    fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
    assertEquals(1, fileSliceList.size(), "Expect only one file-id");
    fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId());
    assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
    assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant");
    logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
    assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
    // Data Files API tests
    dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
    if (skipCreatingDataFile) {
        assertEquals(0, dataFiles.size(), "Expect no data file to be returned");
    } else {
        assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
        dataFiles.forEach(df -> assertEquals(df.getCommitTime(), instantTime1, "Expect data-file for instant 1 be returned"));
        checkExternalFile(srcFileStatus, dataFiles.get(0).getBootstrapBaseFile(), testBootstrap);
    }
    dataFiles = roView.getLatestBaseFiles(partitionPath).collect(Collectors.toList());
    if (skipCreatingDataFile) {
        assertEquals(0, dataFiles.size(), "Expect no data file to be returned");
    } else {
        assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
        dataFiles.forEach(df -> assertEquals(df.getCommitTime(), instantTime1, "Expect data-file for instant 1 be returned"));
        checkExternalFile(srcFileStatus, dataFiles.get(0).getBootstrapBaseFile(), testBootstrap);
    }
    dataFiles = roView.getLatestBaseFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
    if (skipCreatingDataFile) {
        assertEquals(0, dataFiles.size(), "Expect no data file to be returned");
    } else {
        assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
        dataFiles.forEach(df -> assertEquals(df.getCommitTime(), instantTime1, "Expect data-file for instant 1 be returned"));
        checkExternalFile(srcFileStatus, dataFiles.get(0).getBootstrapBaseFile(), testBootstrap);
    }
    dataFiles = roView.getLatestBaseFilesInRange(allInstantTimes).collect(Collectors.toList());
    if (skipCreatingDataFile) {
        assertEquals(0, dataFiles.size(), "Expect no data file to be returned");
    } else {
        assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
        dataFiles.forEach(df -> assertEquals(df.getCommitTime(), instantTime1, "Expect data-file for instant 1 be returned"));
        checkExternalFile(srcFileStatus, dataFiles.get(0).getBootstrapBaseFile(), testBootstrap);
    }
    // Inflight/Orphan File-groups needs to be in the view
    // There is a data-file with this inflight file-id
    final String inflightFileId1 = UUID.randomUUID().toString();
    // There is a log-file with this inflight file-id
    final String inflightFileId2 = UUID.randomUUID().toString();
    // There is an orphan data file with this file-id
    final String orphanFileId1 = UUID.randomUUID().toString();
    // There is an orphan log data file with this file-id
    final String orphanFileId2 = UUID.randomUUID().toString();
    final String invalidInstantId = "INVALIDTIME";
    String inflightDeltaInstantTime = "7";
    String orphanDataFileName = FSUtils.makeDataFileName(invalidInstantId, TEST_WRITE_TOKEN, orphanFileId1);
    new File(basePath + "/" + partitionPath + "/" + orphanDataFileName).createNewFile();
    String orphanLogFileName = FSUtils.makeLogFileName(orphanFileId2, HoodieLogFile.DELTA_EXTENSION, invalidInstantId, 0, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath + "/" + orphanLogFileName).createNewFile();
    String inflightDataFileName = FSUtils.makeDataFileName(inflightDeltaInstantTime, TEST_WRITE_TOKEN, inflightFileId1);
    new File(basePath + "/" + partitionPath + "/" + inflightDataFileName).createNewFile();
    String inflightLogFileName = FSUtils.makeLogFileName(inflightFileId2, HoodieLogFile.DELTA_EXTENSION, inflightDeltaInstantTime, 0, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath + "/" + inflightLogFileName).createNewFile();
    // Mark instant as inflight
    commitTimeline.createNewInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, inflightDeltaInstantTime));
    commitTimeline.transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, inflightDeltaInstantTime), Option.empty());
    refreshFsView();
    List<FileSlice> allRawFileSlices = getAllRawFileSlices(partitionPath).collect(Collectors.toList());
    dataFiles = allRawFileSlices.stream().flatMap(slice -> {
        if (slice.getBaseFile().isPresent()) {
            return Stream.of(slice.getBaseFile().get());
        }
        return Stream.empty();
    }).collect(Collectors.toList());
    if (includeInvalidAndInflight) {
        assertEquals(2 + (isCompactionInFlight ? 1 : 0) + (skipCreatingDataFile ? 0 : 1), dataFiles.size(), "Inflight/Orphan data-file is also expected");
        Set<String> fileNames = dataFiles.stream().map(HoodieBaseFile::getFileName).collect(Collectors.toSet());
        assertTrue(fileNames.contains(orphanDataFileName), "Expect orphan data-file to be present");
        assertTrue(fileNames.contains(inflightDataFileName), "Expect inflight data-file to be present");
        if (!skipCreatingDataFile) {
            assertTrue(fileNames.contains(dataFileName), "Expect old committed data-file");
        }
        if (isCompactionInFlight) {
            assertTrue(fileNames.contains(compactDataFileName), "Expect inflight compacted data file to be present");
        }
        fileSliceList = getLatestRawFileSlices(partitionPath).collect(Collectors.toList());
        assertEquals(includeInvalidAndInflight ? 5 : 1, fileSliceList.size(), "Expect both inflight and orphan file-slice to be included");
        Map<String, FileSlice> fileSliceMap = fileSliceList.stream().collect(Collectors.toMap(FileSlice::getFileId, r -> r));
        FileSlice orphanFileSliceWithDataFile = fileSliceMap.get(orphanFileId1);
        FileSlice orphanFileSliceWithLogFile = fileSliceMap.get(orphanFileId2);
        FileSlice inflightFileSliceWithDataFile = fileSliceMap.get(inflightFileId1);
        FileSlice inflightFileSliceWithLogFile = fileSliceMap.get(inflightFileId2);
        assertEquals(invalidInstantId, orphanFileSliceWithDataFile.getBaseInstantTime(), "Orphan File Slice with data-file check base-commit");
        assertEquals(orphanDataFileName, orphanFileSliceWithDataFile.getBaseFile().get().getFileName(), "Orphan File Slice with data-file check data-file");
        assertEquals(0, orphanFileSliceWithDataFile.getLogFiles().count(), "Orphan File Slice with data-file check data-file");
        assertEquals(inflightDeltaInstantTime, inflightFileSliceWithDataFile.getBaseInstantTime(), "Inflight File Slice with data-file check base-commit");
        assertEquals(inflightDataFileName, inflightFileSliceWithDataFile.getBaseFile().get().getFileName(), "Inflight File Slice with data-file check data-file");
        assertEquals(0, inflightFileSliceWithDataFile.getLogFiles().count(), "Inflight File Slice with data-file check data-file");
        assertEquals(invalidInstantId, orphanFileSliceWithLogFile.getBaseInstantTime(), "Orphan File Slice with log-file check base-commit");
        assertFalse(orphanFileSliceWithLogFile.getBaseFile().isPresent(), "Orphan File Slice with log-file check data-file");
        logFiles = orphanFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
        assertEquals(1, logFiles.size(), "Orphan File Slice with log-file check data-file");
        assertEquals(orphanLogFileName, logFiles.get(0).getFileName(), "Orphan File Slice with log-file check data-file");
        assertEquals(inflightDeltaInstantTime, inflightFileSliceWithLogFile.getBaseInstantTime(), "Inflight File Slice with log-file check base-commit");
        assertFalse(inflightFileSliceWithLogFile.getBaseFile().isPresent(), "Inflight File Slice with log-file check data-file");
        logFiles = inflightFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
        assertEquals(1, logFiles.size(), "Inflight File Slice with log-file check data-file");
        assertEquals(inflightLogFileName, logFiles.get(0).getFileName(), "Inflight File Slice with log-file check data-file");
    }
    compactionInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
    // Now simulate Compaction completing - Check the view
    if (!isCompactionInFlight) {
        // For inflight compaction, we already create a data-file to test concurrent inflight case.
        // If we skipped creating data file corresponding to compaction commit, create it now
        new File(basePath + "/" + partitionPath + "/" + compactDataFileName).createNewFile();
        commitTimeline.createNewInstant(compactionInstant);
    }
    commitTimeline.saveAsComplete(compactionInstant, Option.empty());
    refreshFsView();
    // populate the cache
    roView.getAllBaseFiles(partitionPath);
    fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
    LOG.info("FILESLICE LIST=" + fileSliceList);
    dataFiles = fileSliceList.stream().map(FileSlice::getBaseFile).filter(Option::isPresent).map(Option::get).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "Expect only one data-files in latest view as there is only one file-group");
    assertEquals(compactDataFileName, dataFiles.get(0).getFileName(), "Data Filename must match");
    assertEquals(1, fileSliceList.size(), "Only one latest file-slice in the partition");
    assertFalse(dataFiles.get(0).getBootstrapBaseFile().isPresent(), "No external data file must be present");
    fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId(), "Check file-Id is set correctly");
    assertEquals(compactDataFileName, fileSlice.getBaseFile().get().getFileName(), "Check data-filename is set correctly");
    assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Ensure base-instant is now compaction request instant");
    logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(2, logFiles.size(), "Only log-files after compaction request shows up");
    assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
    // Data Files API tests
    dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
    assertFalse(dataFiles.get(0).getBootstrapBaseFile().isPresent(), "No external data file must be present");
    dataFiles.forEach(df -> {
        assertEquals(df.getCommitTime(), compactionRequestedTime, "Expect data-file created by compaction be returned");
        assertFalse(df.getBootstrapBaseFile().isPresent(), "No external data file must be present");
    });
    dataFiles = roView.getLatestBaseFiles(partitionPath).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
    dataFiles.forEach(df -> {
        assertEquals(df.getCommitTime(), compactionRequestedTime, "Expect data-file created by compaction be returned");
        assertFalse(df.getBootstrapBaseFile().isPresent(), "No external data file must be present");
    });
    dataFiles = roView.getLatestBaseFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
    dataFiles.forEach(df -> {
        assertEquals(df.getCommitTime(), compactionRequestedTime, "Expect data-file created by compaction be returned");
        assertFalse(df.getBootstrapBaseFile().isPresent(), "No external data file must be present");
    });
    dataFiles = roView.getLatestBaseFilesInRange(allInstantTimes).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "Expect only one data-file to be sent");
    dataFiles.forEach(df -> {
        assertEquals(df.getCommitTime(), compactionRequestedTime, "Expect data-file created by compaction be returned");
        assertFalse(df.getBootstrapBaseFile().isPresent(), "No external data file must be present");
    });
    assertEquals(expTotalFileSlices, rtView.getAllFileSlices(partitionPath).count(), "Total number of file-slices in partitions matches expected");
    assertEquals(expTotalDataFiles, roView.getAllBaseFiles(partitionPath).count(), "Total number of data-files in partitions matches expected");
    // file-groups includes inflight/invalid file-ids
    assertEquals(5, fsView.getAllFileGroups(partitionPath).count(), "Total number of file-groups in partitions matches expected");
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Date(java.util.Date) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File)

Example 2 with IndexWriter

use of org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter in project hudi by apache.

the class TestBootstrapIndex method generateBootstrapIndex.

public static Map<String, List<BootstrapFileMapping>> generateBootstrapIndex(HoodieTableMetaClient metaClient, String sourceBasePath, String[] partitions, int numEntriesPerPartition) {
    Map<String, List<BootstrapFileMapping>> bootstrapMapping = generateBootstrapMapping(sourceBasePath, partitions, numEntriesPerPartition);
    BootstrapIndex index = new HFileBootstrapIndex(metaClient);
    try (IndexWriter writer = index.createWriter(sourceBasePath)) {
        writer.begin();
        bootstrapMapping.entrySet().stream().forEach(e -> writer.appendNextPartition(e.getKey(), e.getValue()));
        writer.finish();
    }
    return bootstrapMapping;
}
Also used : IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) ArrayList(java.util.ArrayList) List(java.util.List) NoOpBootstrapIndex(org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 IndexWriter (org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter)2 HFileBootstrapIndex (org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex)2 File (java.io.File)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Set (java.util.Set)1 UUID (java.util.UUID)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1