Search in sources :

Example 1 with HoodieWrapperFileSystem

use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.

the class TestCompactionCommand method generateCompactionInstances.

private void generateCompactionInstances() throws IOException {
    // create MOR table.
    new TableCommand().createTable(tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, HoodieAvroPayload.class.getName());
    CompactionTestUtils.setupAndValidateCompactionOperations(HoodieCLI.getTableMetaClient(), true, 1, 2, 3, 4);
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().reloadActiveTimeline();
    // Create six commits
    Arrays.asList("001", "003", "005", "007").forEach(timestamp -> {
        activeTimeline.transitionCompactionInflightToComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, COMPACTION_ACTION, timestamp), Option.empty());
    });
    // Simulate a compaction commit in metadata table timeline
    // so the archival in data table can happen
    HoodieTestUtils.createCompactionCommitInMetadataTable(hadoopConf(), new HoodieWrapperFileSystem(FSUtils.getFs(tablePath, hadoopConf()), new NoOpConsistencyGuard()), tablePath, "007");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) NoOpConsistencyGuard(org.apache.hudi.common.fs.NoOpConsistencyGuard) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload)

Example 2 with HoodieWrapperFileSystem

use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.

the class HoodieTimelineArchiver method verifyLastMergeArchiveFilesIfNecessary.

/**
 * Check/Solve if there is any failed and unfinished merge small archive files operation
 * @param context HoodieEngineContext used for parallelize to delete small archive files if necessary.
 * @throws IOException
 */
private void verifyLastMergeArchiveFilesIfNecessary(HoodieEngineContext context) throws IOException {
    if (shouldMergeSmallArchiveFies()) {
        Path planPath = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
        HoodieWrapperFileSystem fs = metaClient.getFs();
        // we need to revert or complete last action.
        if (fs.exists(planPath)) {
            HoodieMergeArchiveFilePlan plan = null;
            try {
                plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fs, planPath).get(), HoodieMergeArchiveFilePlan.class);
            } catch (IOException e) {
                LOG.warn("Parsing merge archive plan failed.", e);
                // Reading partial plan file which means last merge action is failed during writing plan file.
                fs.delete(planPath);
                return;
            }
            Path mergedArchiveFile = new Path(metaClient.getArchivePath(), plan.getMergedArchiveFileName());
            List<Path> candidates = plan.getCandidate().stream().map(Path::new).collect(Collectors.toList());
            if (candidateAllExists(candidates)) {
                // Revert last action by deleting mergedArchiveFile if existed.
                if (fs.exists(mergedArchiveFile)) {
                    fs.delete(mergedArchiveFile, false);
                }
            } else {
                // Try to complete last action
                if (fs.exists(mergedArchiveFile)) {
                    deleteFilesParallelize(metaClient, plan.getCandidate(), context, true);
                }
            }
            fs.delete(planPath);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)

Example 3 with HoodieWrapperFileSystem

use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.

the class HoodieArchivedTimeline method loadInstants.

/**
 * This is method to read selected instants. Do NOT use this directly use one of the helper methods above
 * If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
 * If filter is specified, only the filtered instants are loaded
 * If commitsFilter is specified, only the filtered records are loaded
 */
private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadInstantDetails, Function<GenericRecord, Boolean> commitsFilter) {
    try {
        // List all files
        FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
        // Sort files by version suffix in reverse (implies reverse chronological order)
        Arrays.sort(fsStatuses, new ArchiveFileVersionComparator());
        Set<HoodieInstant> instantsInRange = new HashSet<>();
        for (FileStatus fs : fsStatuses) {
            // Read the archived file
            try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
                int instantsInPreviousFile = instantsInRange.size();
                // Read the avro blocks
                while (reader.hasNext()) {
                    HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
                    // (such as startTime, endTime of records in the block)
                    try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
                        StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true).filter(r -> commitsFilter.apply((GenericRecord) r)).map(r -> readCommit((GenericRecord) r, loadInstantDetails)).filter(c -> filter == null || filter.isInRange(c)).forEach(instantsInRange::add);
                    }
                }
                if (filter != null) {
                    int instantsInCurrentFile = instantsInRange.size() - instantsInPreviousFile;
                    if (instantsInPreviousFile > 0 && instantsInCurrentFile == 0) {
                        // This signals we crossed lower bound of desired time window.
                        break;
                    }
                }
            } catch (Exception originalException) {
                // need to ignore this kind of exception here.
                try {
                    Path planPath = new Path(metaClient.getArchivePath(), MERGE_ARCHIVE_PLAN_NAME);
                    HoodieWrapperFileSystem fileSystem = metaClient.getFs();
                    if (fileSystem.exists(planPath)) {
                        HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class);
                        String mergedArchiveFileName = plan.getMergedArchiveFileName();
                        if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) {
                            LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here.");
                            continue;
                        }
                    }
                    throw originalException;
                } catch (Exception e) {
                    // For example corrupted archive file and corrupted plan are both existed.
                    throw originalException;
                }
            }
        }
        ArrayList<HoodieInstant> result = new ArrayList<>(instantsInRange);
        Collections.sort(result);
        return result;
    } catch (IOException e) {
        throw new HoodieIOException("Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
    }
}
Also used : HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Spliterators(java.util.Spliterators) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) Matcher(java.util.regex.Matcher) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) StreamSupport(java.util.stream.StreamSupport) Nonnull(javax.annotation.Nonnull) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Set(java.util.Set) IOException(java.io.IOException) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) Spliterator(java.util.Spliterator) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) ArrayList(java.util.ArrayList) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) GenericRecord(org.apache.avro.generic.GenericRecord) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 4 with HoodieWrapperFileSystem

use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.

the class TestHoodieTableFileSystemView method testHoodieTableFileSystemViewWithPendingClustering.

/**
 * create hoodie table like
 * .
 * ├── .hoodie
 * │   ├── .aux
 * │   │   └── .bootstrap
 * │   │       ├── .fileids
 * │   │       └── .partitions
 * │   ├── .temp
 * │   ├── 1.commit
 * │   ├── 1.commit.requested
 * │   ├── 1.inflight
 * │   ├── 2.replacecommit
 * │   ├── 2.replacecommit.inflight
 * │   ├── 2.replacecommit.requested
 * │   ├── 3.commit
 * │   ├── 3.commit.requested
 * │   ├── 3.inflight
 * │   ├── archived
 * │   └── hoodie.properties
 * └── 2020
 *     └── 06
 *         └── 27
 *             ├── 5fe477d2-0150-46d4-833c-1e9cc8da9948_1-0-1_3.parquet
 *             ├── 7e3208c8-fdec-4254-9682-8fff1e51ee8d_1-0-1_2.parquet
 *             ├── e04b0e2d-1467-46b2-8ea6-f4fe950965a5_1-0-1_1.parquet
 *             └── f3936b66-b3db-4fc8-a6d0-b1a7559016e6_1-0-1_1.parquet
 *
 * First test fsView API with finished clustering:
 *  1. getLatestBaseFilesBeforeOrOn
 *  2. getBaseFileOn
 *  3. getLatestBaseFilesInRange
 *  4. getAllBaseFiles
 *  5. getLatestBaseFiles
 *
 * Then remove 2.replacecommit, 1.commit, 1.commit.requested, 1.inflight to simulate
 * pending clustering at the earliest position in the active timeline and test these APIs again.
 *
 * @throws IOException
 */
@Test
public void testHoodieTableFileSystemViewWithPendingClustering() throws IOException {
    List<String> latestBaseFilesBeforeOrOn;
    Option<HoodieBaseFile> baseFileOn;
    List<String> latestBaseFilesInRange;
    List<String> allBaseFiles;
    List<String> latestBaseFiles;
    List<String> latestBaseFilesPerPartition;
    String partitionPath = "2020/06/27";
    new File(basePath + "/" + partitionPath).mkdirs();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    // will create 5 fileId in partition.
    // fileId1 and fileId2 will be replaced by fileID3
    // fileId4 and fileId5 will be committed after clustering finished.
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    String fileId3 = UUID.randomUUID().toString();
    String fileId4 = UUID.randomUUID().toString();
    String fileId5 = UUID.randomUUID().toString();
    assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2) || dfile.getFileId().equals(fileId3) || dfile.getFileId().equals(fileId4) || dfile.getFileId().equals(fileId5)), "No commit, should not find any data file");
    // first insert commit
    String commitTime1 = "1";
    String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
    String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
    new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
    new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
    // build writeStats
    HashMap<String, List<String>> partitionToFile1 = new HashMap<>();
    ArrayList<String> files1 = new ArrayList<>();
    files1.add(fileId1);
    files1.add(fileId2);
    partitionToFile1.put(partitionPath, files1);
    List<HoodieWriteStat> writeStats1 = buildWriteStats(partitionToFile1, commitTime1);
    HoodieCommitMetadata commitMetadata1 = CommitUtils.buildMetadata(writeStats1, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata1.toJsonString().getBytes(StandardCharsets.UTF_8)));
    commitTimeline.reload();
    // replace commit
    String commitTime2 = "2";
    String fileName3 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
    new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add(fileId1);
    replacedFileIds.add(fileId2);
    partitionToReplaceFileIds.put(partitionPath, replacedFileIds);
    HashMap<String, List<String>> partitionToFile2 = new HashMap<>();
    ArrayList<String> files2 = new ArrayList<>();
    files2.add(fileId3);
    partitionToFile2.put(partitionPath, files2);
    List<HoodieWriteStat> writeStats2 = buildWriteStats(partitionToFile2, commitTime2);
    HoodieCommitMetadata commitMetadata2 = CommitUtils.buildMetadata(writeStats2, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata2.toJsonString().getBytes(StandardCharsets.UTF_8)));
    // another insert commit
    String commitTime3 = "3";
    String fileName4 = FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId4);
    new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
    HoodieInstant instant3 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime3);
    // build writeStats
    HashMap<String, List<String>> partitionToFile3 = new HashMap<>();
    ArrayList<String> files3 = new ArrayList<>();
    files3.add(fileId4);
    partitionToFile3.put(partitionPath, files3);
    List<HoodieWriteStat> writeStats3 = buildWriteStats(partitionToFile3, commitTime3);
    HoodieCommitMetadata commitMetadata3 = CommitUtils.buildMetadata(writeStats3, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant3, Option.of(commitMetadata3.toJsonString().getBytes(StandardCharsets.UTF_8)));
    metaClient.reloadActiveTimeline();
    refreshFsView();
    ArrayList<String> commits = new ArrayList<>();
    commits.add(commitTime1);
    commits.add(commitTime2);
    commits.add(commitTime3);
    // do check
    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFilesBeforeOrOn.size());
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId3));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
    // could see fileId3 because clustering is committed.
    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
    assertTrue(baseFileOn.isPresent());
    assertEquals(baseFileOn.get().getFileId(), fileId3);
    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFilesInRange.size());
    assertTrue(latestBaseFilesInRange.contains(fileId3));
    assertTrue(latestBaseFilesInRange.contains(fileId4));
    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, allBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId3));
    assertTrue(allBaseFiles.contains(fileId4));
    // could see fileId3 because clustering is committed.
    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId3));
    assertTrue(allBaseFiles.contains(fileId4));
    // could see fileId3 because clustering is committed.
    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFiles.size());
    assertTrue(latestBaseFilesPerPartition.contains(fileId3));
    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
    HoodieWrapperFileSystem fs = metaClient.getFs();
    fs.delete(new Path(basePath + "/.hoodie", "1.commit"), false);
    fs.delete(new Path(basePath + "/.hoodie", "1.inflight"), false);
    fs.delete(new Path(basePath + "/.hoodie", "1.commit.requested"), false);
    fs.delete(new Path(basePath + "/.hoodie", "2.replacecommit"), false);
    metaClient.reloadActiveTimeline();
    refreshFsView();
    // do check after delete some commit file
    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFilesBeforeOrOn.size());
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId1));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId2));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
    assertFalse(baseFileOn.isPresent());
    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFilesInRange.size());
    assertTrue(latestBaseFilesInRange.contains(fileId1));
    assertTrue(latestBaseFilesInRange.contains(fileId2));
    assertTrue(latestBaseFilesInRange.contains(fileId4));
    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, allBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId1));
    assertTrue(allBaseFiles.contains(fileId2));
    assertTrue(allBaseFiles.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId1));
    assertTrue(allBaseFiles.contains(fileId2));
    assertTrue(allBaseFiles.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFiles.size());
    assertTrue(latestBaseFilesPerPartition.contains(fileId1));
    assertTrue(latestBaseFilesPerPartition.contains(fileId2));
    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) ArrayList(java.util.ArrayList) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) ArrayList(java.util.ArrayList) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 5 with HoodieWrapperFileSystem

use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.

the class HoodieTableMetaClient method getFs.

/**
 * Get the FS implementation for this table.
 */
public HoodieWrapperFileSystem getFs() {
    if (fs == null) {
        FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
        if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {
            fileSystem = new HoodieRetryWrapperFileSystem(fileSystem, fileSystemRetryConfig.getMaxRetryIntervalMs(), fileSystemRetryConfig.getMaxRetryNumbers(), fileSystemRetryConfig.getInitialRetryIntervalMs(), fileSystemRetryConfig.getRetryExceptions());
        }
        ValidationUtils.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem), "File System not expected to be that of HoodieWrapperFileSystem");
        fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled() ? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) : new NoOpConsistencyGuard());
    }
    return fs;
}
Also used : HoodieRetryWrapperFileSystem(org.apache.hudi.common.fs.HoodieRetryWrapperFileSystem) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) HoodieRetryWrapperFileSystem(org.apache.hudi.common.fs.HoodieRetryWrapperFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) FailSafeConsistencyGuard(org.apache.hudi.common.fs.FailSafeConsistencyGuard) NoOpConsistencyGuard(org.apache.hudi.common.fs.NoOpConsistencyGuard)

Aggregations

HoodieWrapperFileSystem (org.apache.hudi.common.fs.HoodieWrapperFileSystem)6 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 StandardCharsets (java.nio.charset.StandardCharsets)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 HoodieMergeArchiveFilePlan (org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)2 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)2 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2 Option (org.apache.hudi.common.util.Option)2 HoodieIOException (org.apache.hudi.exception.HoodieIOException)2 LogManager (org.apache.log4j.LogManager)2