use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.
the class HoodieTimelineArchiver method verifyLastMergeArchiveFilesIfNecessary.
/**
* Check/Solve if there is any failed and unfinished merge small archive files operation
* @param context HoodieEngineContext used for parallelize to delete small archive files if necessary.
* @throws IOException
*/
private void verifyLastMergeArchiveFilesIfNecessary(HoodieEngineContext context) throws IOException {
if (shouldMergeSmallArchiveFies()) {
Path planPath = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
HoodieWrapperFileSystem fs = metaClient.getFs();
// we need to revert or complete last action.
if (fs.exists(planPath)) {
HoodieMergeArchiveFilePlan plan = null;
try {
plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fs, planPath).get(), HoodieMergeArchiveFilePlan.class);
} catch (IOException e) {
LOG.warn("Parsing merge archive plan failed.", e);
// Reading partial plan file which means last merge action is failed during writing plan file.
fs.delete(planPath);
return;
}
Path mergedArchiveFile = new Path(metaClient.getArchivePath(), plan.getMergedArchiveFileName());
List<Path> candidates = plan.getCandidate().stream().map(Path::new).collect(Collectors.toList());
if (candidateAllExists(candidates)) {
// Revert last action by deleting mergedArchiveFile if existed.
if (fs.exists(mergedArchiveFile)) {
fs.delete(mergedArchiveFile, false);
}
} else {
// Try to complete last action
if (fs.exists(mergedArchiveFile)) {
deleteFilesParallelize(metaClient, plan.getCandidate(), context, true);
}
}
fs.delete(planPath);
}
}
}
use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.
the class HoodieArchivedTimeline method loadInstants.
/**
* This is method to read selected instants. Do NOT use this directly use one of the helper methods above
* If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
* If filter is specified, only the filtered instants are loaded
* If commitsFilter is specified, only the filtered records are loaded
*/
private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadInstantDetails, Function<GenericRecord, Boolean> commitsFilter) {
try {
// List all files
FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
// Sort files by version suffix in reverse (implies reverse chronological order)
Arrays.sort(fsStatuses, new ArchiveFileVersionComparator());
Set<HoodieInstant> instantsInRange = new HashSet<>();
for (FileStatus fs : fsStatuses) {
// Read the archived file
try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
int instantsInPreviousFile = instantsInRange.size();
// Read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
// (such as startTime, endTime of records in the block)
try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true).filter(r -> commitsFilter.apply((GenericRecord) r)).map(r -> readCommit((GenericRecord) r, loadInstantDetails)).filter(c -> filter == null || filter.isInRange(c)).forEach(instantsInRange::add);
}
}
if (filter != null) {
int instantsInCurrentFile = instantsInRange.size() - instantsInPreviousFile;
if (instantsInPreviousFile > 0 && instantsInCurrentFile == 0) {
// This signals we crossed lower bound of desired time window.
break;
}
}
} catch (Exception originalException) {
// need to ignore this kind of exception here.
try {
Path planPath = new Path(metaClient.getArchivePath(), MERGE_ARCHIVE_PLAN_NAME);
HoodieWrapperFileSystem fileSystem = metaClient.getFs();
if (fileSystem.exists(planPath)) {
HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class);
String mergedArchiveFileName = plan.getMergedArchiveFileName();
if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) {
LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here.");
continue;
}
}
throw originalException;
} catch (Exception e) {
// For example corrupted archive file and corrupted plan are both existed.
throw originalException;
}
}
}
ArrayList<HoodieInstant> result = new ArrayList<>(instantsInRange);
Collections.sort(result);
return result;
} catch (IOException e) {
throw new HoodieIOException("Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
}
}
use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.
the class HoodieTimelineArchiver method buildArchiveMergePlan.
public void buildArchiveMergePlan(List<String> compactCandidate, Path planPath, String compactedArchiveFileName) throws IOException {
LOG.info("Start to build archive merge plan.");
HoodieMergeArchiveFilePlan plan = HoodieMergeArchiveFilePlan.newBuilder().setCandidate(compactCandidate).setMergedArchiveFileName(compactedArchiveFileName).build();
Option<byte[]> content = TimelineMetadataUtils.serializeAvroMetadata(plan, HoodieMergeArchiveFilePlan.class);
// building merge archive files plan.
FileIOUtils.createFileInPath(metaClient.getFs(), planPath, content);
LOG.info("Success to build archive merge plan");
}
Aggregations