use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.
the class TestCompactionCommand method generateCompactionInstances.
private void generateCompactionInstances() throws IOException {
// create MOR table.
new TableCommand().createTable(tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, HoodieAvroPayload.class.getName());
CompactionTestUtils.setupAndValidateCompactionOperations(HoodieCLI.getTableMetaClient(), true, 1, 2, 3, 4);
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().reloadActiveTimeline();
// Create six commits
Arrays.asList("001", "003", "005", "007").forEach(timestamp -> {
activeTimeline.transitionCompactionInflightToComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, COMPACTION_ACTION, timestamp), Option.empty());
});
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
HoodieTestUtils.createCompactionCommitInMetadataTable(hadoopConf(), new HoodieWrapperFileSystem(FSUtils.getFs(tablePath, hadoopConf()), new NoOpConsistencyGuard()), tablePath, "007");
}
use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.
the class HoodieTimelineArchiver method verifyLastMergeArchiveFilesIfNecessary.
/**
* Check/Solve if there is any failed and unfinished merge small archive files operation
* @param context HoodieEngineContext used for parallelize to delete small archive files if necessary.
* @throws IOException
*/
private void verifyLastMergeArchiveFilesIfNecessary(HoodieEngineContext context) throws IOException {
if (shouldMergeSmallArchiveFies()) {
Path planPath = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
HoodieWrapperFileSystem fs = metaClient.getFs();
// we need to revert or complete last action.
if (fs.exists(planPath)) {
HoodieMergeArchiveFilePlan plan = null;
try {
plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fs, planPath).get(), HoodieMergeArchiveFilePlan.class);
} catch (IOException e) {
LOG.warn("Parsing merge archive plan failed.", e);
// Reading partial plan file which means last merge action is failed during writing plan file.
fs.delete(planPath);
return;
}
Path mergedArchiveFile = new Path(metaClient.getArchivePath(), plan.getMergedArchiveFileName());
List<Path> candidates = plan.getCandidate().stream().map(Path::new).collect(Collectors.toList());
if (candidateAllExists(candidates)) {
// Revert last action by deleting mergedArchiveFile if existed.
if (fs.exists(mergedArchiveFile)) {
fs.delete(mergedArchiveFile, false);
}
} else {
// Try to complete last action
if (fs.exists(mergedArchiveFile)) {
deleteFilesParallelize(metaClient, plan.getCandidate(), context, true);
}
}
fs.delete(planPath);
}
}
}
use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.
the class HoodieArchivedTimeline method loadInstants.
/**
* This is method to read selected instants. Do NOT use this directly use one of the helper methods above
* If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
* If filter is specified, only the filtered instants are loaded
* If commitsFilter is specified, only the filtered records are loaded
*/
private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadInstantDetails, Function<GenericRecord, Boolean> commitsFilter) {
try {
// List all files
FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
// Sort files by version suffix in reverse (implies reverse chronological order)
Arrays.sort(fsStatuses, new ArchiveFileVersionComparator());
Set<HoodieInstant> instantsInRange = new HashSet<>();
for (FileStatus fs : fsStatuses) {
// Read the archived file
try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
int instantsInPreviousFile = instantsInRange.size();
// Read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
// (such as startTime, endTime of records in the block)
try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true).filter(r -> commitsFilter.apply((GenericRecord) r)).map(r -> readCommit((GenericRecord) r, loadInstantDetails)).filter(c -> filter == null || filter.isInRange(c)).forEach(instantsInRange::add);
}
}
if (filter != null) {
int instantsInCurrentFile = instantsInRange.size() - instantsInPreviousFile;
if (instantsInPreviousFile > 0 && instantsInCurrentFile == 0) {
// This signals we crossed lower bound of desired time window.
break;
}
}
} catch (Exception originalException) {
// need to ignore this kind of exception here.
try {
Path planPath = new Path(metaClient.getArchivePath(), MERGE_ARCHIVE_PLAN_NAME);
HoodieWrapperFileSystem fileSystem = metaClient.getFs();
if (fileSystem.exists(planPath)) {
HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class);
String mergedArchiveFileName = plan.getMergedArchiveFileName();
if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) {
LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here.");
continue;
}
}
throw originalException;
} catch (Exception e) {
// For example corrupted archive file and corrupted plan are both existed.
throw originalException;
}
}
}
ArrayList<HoodieInstant> result = new ArrayList<>(instantsInRange);
Collections.sort(result);
return result;
} catch (IOException e) {
throw new HoodieIOException("Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
}
}
use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.
the class TestHoodieTableFileSystemView method testHoodieTableFileSystemViewWithPendingClustering.
/**
* create hoodie table like
* .
* ├── .hoodie
* │ ├── .aux
* │ │ └── .bootstrap
* │ │ ├── .fileids
* │ │ └── .partitions
* │ ├── .temp
* │ ├── 1.commit
* │ ├── 1.commit.requested
* │ ├── 1.inflight
* │ ├── 2.replacecommit
* │ ├── 2.replacecommit.inflight
* │ ├── 2.replacecommit.requested
* │ ├── 3.commit
* │ ├── 3.commit.requested
* │ ├── 3.inflight
* │ ├── archived
* │ └── hoodie.properties
* └── 2020
* └── 06
* └── 27
* ├── 5fe477d2-0150-46d4-833c-1e9cc8da9948_1-0-1_3.parquet
* ├── 7e3208c8-fdec-4254-9682-8fff1e51ee8d_1-0-1_2.parquet
* ├── e04b0e2d-1467-46b2-8ea6-f4fe950965a5_1-0-1_1.parquet
* └── f3936b66-b3db-4fc8-a6d0-b1a7559016e6_1-0-1_1.parquet
*
* First test fsView API with finished clustering:
* 1. getLatestBaseFilesBeforeOrOn
* 2. getBaseFileOn
* 3. getLatestBaseFilesInRange
* 4. getAllBaseFiles
* 5. getLatestBaseFiles
*
* Then remove 2.replacecommit, 1.commit, 1.commit.requested, 1.inflight to simulate
* pending clustering at the earliest position in the active timeline and test these APIs again.
*
* @throws IOException
*/
@Test
public void testHoodieTableFileSystemViewWithPendingClustering() throws IOException {
List<String> latestBaseFilesBeforeOrOn;
Option<HoodieBaseFile> baseFileOn;
List<String> latestBaseFilesInRange;
List<String> allBaseFiles;
List<String> latestBaseFiles;
List<String> latestBaseFilesPerPartition;
String partitionPath = "2020/06/27";
new File(basePath + "/" + partitionPath).mkdirs();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
// will create 5 fileId in partition.
// fileId1 and fileId2 will be replaced by fileID3
// fileId4 and fileId5 will be committed after clustering finished.
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
String fileId3 = UUID.randomUUID().toString();
String fileId4 = UUID.randomUUID().toString();
String fileId5 = UUID.randomUUID().toString();
assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2) || dfile.getFileId().equals(fileId3) || dfile.getFileId().equals(fileId4) || dfile.getFileId().equals(fileId5)), "No commit, should not find any data file");
// first insert commit
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
// build writeStats
HashMap<String, List<String>> partitionToFile1 = new HashMap<>();
ArrayList<String> files1 = new ArrayList<>();
files1.add(fileId1);
files1.add(fileId2);
partitionToFile1.put(partitionPath, files1);
List<HoodieWriteStat> writeStats1 = buildWriteStats(partitionToFile1, commitTime1);
HoodieCommitMetadata commitMetadata1 = CommitUtils.buildMetadata(writeStats1, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata1.toJsonString().getBytes(StandardCharsets.UTF_8)));
commitTimeline.reload();
// replace commit
String commitTime2 = "2";
String fileName3 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
List<String> replacedFileIds = new ArrayList<>();
replacedFileIds.add(fileId1);
replacedFileIds.add(fileId2);
partitionToReplaceFileIds.put(partitionPath, replacedFileIds);
HashMap<String, List<String>> partitionToFile2 = new HashMap<>();
ArrayList<String> files2 = new ArrayList<>();
files2.add(fileId3);
partitionToFile2.put(partitionPath, files2);
List<HoodieWriteStat> writeStats2 = buildWriteStats(partitionToFile2, commitTime2);
HoodieCommitMetadata commitMetadata2 = CommitUtils.buildMetadata(writeStats2, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata2.toJsonString().getBytes(StandardCharsets.UTF_8)));
// another insert commit
String commitTime3 = "3";
String fileName4 = FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId4);
new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
HoodieInstant instant3 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime3);
// build writeStats
HashMap<String, List<String>> partitionToFile3 = new HashMap<>();
ArrayList<String> files3 = new ArrayList<>();
files3.add(fileId4);
partitionToFile3.put(partitionPath, files3);
List<HoodieWriteStat> writeStats3 = buildWriteStats(partitionToFile3, commitTime3);
HoodieCommitMetadata commitMetadata3 = CommitUtils.buildMetadata(writeStats3, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
saveAsComplete(commitTimeline, instant3, Option.of(commitMetadata3.toJsonString().getBytes(StandardCharsets.UTF_8)));
metaClient.reloadActiveTimeline();
refreshFsView();
ArrayList<String> commits = new ArrayList<>();
commits.add(commitTime1);
commits.add(commitTime2);
commits.add(commitTime3);
// do check
latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(2, latestBaseFilesBeforeOrOn.size());
assertTrue(latestBaseFilesBeforeOrOn.contains(fileId3));
assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
// could see fileId3 because clustering is committed.
baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
assertTrue(baseFileOn.isPresent());
assertEquals(baseFileOn.get().getFileId(), fileId3);
latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(2, latestBaseFilesInRange.size());
assertTrue(latestBaseFilesInRange.contains(fileId3));
assertTrue(latestBaseFilesInRange.contains(fileId4));
allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(2, allBaseFiles.size());
assertTrue(allBaseFiles.contains(fileId3));
assertTrue(allBaseFiles.contains(fileId4));
// could see fileId3 because clustering is committed.
latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(2, latestBaseFiles.size());
assertTrue(allBaseFiles.contains(fileId3));
assertTrue(allBaseFiles.contains(fileId4));
// could see fileId3 because clustering is committed.
latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(2, latestBaseFiles.size());
assertTrue(latestBaseFilesPerPartition.contains(fileId3));
assertTrue(latestBaseFilesPerPartition.contains(fileId4));
HoodieWrapperFileSystem fs = metaClient.getFs();
fs.delete(new Path(basePath + "/.hoodie", "1.commit"), false);
fs.delete(new Path(basePath + "/.hoodie", "1.inflight"), false);
fs.delete(new Path(basePath + "/.hoodie", "1.commit.requested"), false);
fs.delete(new Path(basePath + "/.hoodie", "2.replacecommit"), false);
metaClient.reloadActiveTimeline();
refreshFsView();
// do check after delete some commit file
latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(3, latestBaseFilesBeforeOrOn.size());
assertTrue(latestBaseFilesBeforeOrOn.contains(fileId1));
assertTrue(latestBaseFilesBeforeOrOn.contains(fileId2));
assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
// couldn't see fileId3 because clustering is not committed.
baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
assertFalse(baseFileOn.isPresent());
latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(3, latestBaseFilesInRange.size());
assertTrue(latestBaseFilesInRange.contains(fileId1));
assertTrue(latestBaseFilesInRange.contains(fileId2));
assertTrue(latestBaseFilesInRange.contains(fileId4));
allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(3, allBaseFiles.size());
assertTrue(allBaseFiles.contains(fileId1));
assertTrue(allBaseFiles.contains(fileId2));
assertTrue(allBaseFiles.contains(fileId4));
// couldn't see fileId3 because clustering is not committed.
latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(3, latestBaseFiles.size());
assertTrue(allBaseFiles.contains(fileId1));
assertTrue(allBaseFiles.contains(fileId2));
assertTrue(allBaseFiles.contains(fileId4));
// couldn't see fileId3 because clustering is not committed.
latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
assertEquals(3, latestBaseFiles.size());
assertTrue(latestBaseFilesPerPartition.contains(fileId1));
assertTrue(latestBaseFilesPerPartition.contains(fileId2));
assertTrue(latestBaseFilesPerPartition.contains(fileId4));
}
use of org.apache.hudi.common.fs.HoodieWrapperFileSystem in project hudi by apache.
the class HoodieTableMetaClient method getFs.
/**
* Get the FS implementation for this table.
*/
public HoodieWrapperFileSystem getFs() {
if (fs == null) {
FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {
fileSystem = new HoodieRetryWrapperFileSystem(fileSystem, fileSystemRetryConfig.getMaxRetryIntervalMs(), fileSystemRetryConfig.getMaxRetryNumbers(), fileSystemRetryConfig.getInitialRetryIntervalMs(), fileSystemRetryConfig.getRetryExceptions());
}
ValidationUtils.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem), "File System not expected to be that of HoodieWrapperFileSystem");
fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled() ? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) : new NoOpConsistencyGuard());
}
return fs;
}
Aggregations