use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class BaseRollbackHelper method maybeDeleteAndCollectStats.
/**
* May be delete interested files and collect stats or collect stats only.
*
* @param context instance of {@link HoodieEngineContext} to use.
* @param instantToRollback {@link HoodieInstant} of interest for which deletion or collect stats is requested.
* @param rollbackRequests List of {@link ListingBasedRollbackRequest} to be operated on.
* @param doDelete {@code true} if deletion has to be done. {@code false} if only stats are to be collected w/o performing any deletes.
* @return stats collected with or w/o actual deletions.
*/
List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineContext context, HoodieInstant instantToRollback, List<SerializableHoodieRollbackRequest> rollbackRequests, boolean doDelete, int numPartitions) {
return context.flatMap(rollbackRequests, (SerializableFunction<SerializableHoodieRollbackRequest, Stream<Pair<String, HoodieRollbackStat>>>) rollbackRequest -> {
List<String> filesToBeDeleted = rollbackRequest.getFilesToBeDeleted();
if (!filesToBeDeleted.isEmpty()) {
List<HoodieRollbackStat> rollbackStats = deleteFiles(metaClient, filesToBeDeleted, doDelete);
List<Pair<String, HoodieRollbackStat>> partitionToRollbackStats = new ArrayList<>();
rollbackStats.forEach(entry -> partitionToRollbackStats.add(Pair.of(entry.getPartitionPath(), entry)));
return partitionToRollbackStats.stream();
} else if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
HoodieLogFormat.Writer writer = null;
try {
String fileId = rollbackRequest.getFileId();
String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(FSUtils.getPartitionPath(metaClient.getBasePath(), rollbackRequest.getPartitionPath())).withFileId(fileId).overBaseCommit(latestBaseInstant).withFs(metaClient.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
if (doDelete) {
Map<HoodieLogBlock.HeaderMetadataType, String> header = generateHeader(instantToRollback.getTimestamp());
writer.appendBlock(new HoodieCommandBlock(header));
}
} catch (IOException | InterruptedException io) {
throw new HoodieRollbackException("Failed to rollback for instant " + instantToRollback, io);
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (IOException io) {
throw new HoodieIOException("Error appending rollback block", io);
}
}
Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()), 1L);
return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).withRollbackBlockAppendResults(filesToNumBlocksRollback).build())).stream();
} else {
return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).build())).stream();
}
}, numPartitions);
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class DirectWriteMarkers method create.
private Option<Path> create(Path markerPath, boolean checkIfExists) {
HoodieTimer timer = new HoodieTimer().startTimer();
Path dirPath = markerPath.getParent();
try {
if (!fs.exists(dirPath)) {
// create a new partition as needed.
fs.mkdirs(dirPath);
}
} catch (IOException e) {
throw new HoodieIOException("Failed to make dir " + dirPath, e);
}
try {
if (checkIfExists && fs.exists(markerPath)) {
LOG.warn("Marker Path=" + markerPath + " already exists, cancel creation");
return Option.empty();
}
LOG.info("Creating Marker Path=" + markerPath);
fs.create(markerPath, false).close();
} catch (IOException e) {
throw new HoodieException("Failed to create marker file " + markerPath, e);
}
LOG.info("[direct] Created marker file " + markerPath.toString() + " in " + timer.endTimer() + " ms");
return Option.of(markerPath);
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieTable method deleteInvalidFilesByPartitions.
private void deleteInvalidFilesByPartitions(HoodieEngineContext context, Map<String, List<Pair<String, String>>> invalidFilesByPartition) {
// Now delete partially written files
context.setJobStatus(this.getClass().getSimpleName(), "Delete invalid files generated during the write operation");
context.map(new ArrayList<>(invalidFilesByPartition.values()), partitionWithFileList -> {
final FileSystem fileSystem = metaClient.getFs();
LOG.info("Deleting invalid data files=" + partitionWithFileList);
if (partitionWithFileList.isEmpty()) {
return true;
}
// Delete
partitionWithFileList.stream().map(Pair::getValue).forEach(file -> {
try {
fileSystem.delete(new Path(file), false);
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
return true;
}, config.getFinalizeWriteParallelism());
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieTable method reconcileAgainstMarkers.
/**
* Reconciles WriteStats and marker files to detect and safely delete duplicate data files created because of Spark
* retries.
*
* @param context HoodieEngineContext
* @param instantTs Instant Timestamp
* @param stats Hoodie Write Stat
* @param consistencyCheckEnabled Consistency Check Enabled
* @throws HoodieIOException
*/
protected void reconcileAgainstMarkers(HoodieEngineContext context, String instantTs, List<HoodieWriteStat> stats, boolean consistencyCheckEnabled) throws HoodieIOException {
try {
// Reconcile marker and data files with WriteStats so that partially written data-files due to failed
// (but succeeded on retry) tasks are removed.
String basePath = getMetaClient().getBasePath();
WriteMarkers markers = WriteMarkersFactory.get(config.getMarkersType(), this, instantTs);
if (!markers.doesMarkerDirExist()) {
// can happen if it was an empty write say.
return;
}
// we are not including log appends here, since they are already fail-safe.
Set<String> invalidDataPaths = getInvalidDataPaths(markers);
Set<String> validDataPaths = stats.stream().map(HoodieWriteStat::getPath).filter(p -> p.endsWith(this.getBaseFileExtension())).collect(Collectors.toSet());
// Contains list of partially created files. These needs to be cleaned up.
invalidDataPaths.removeAll(validDataPaths);
if (!invalidDataPaths.isEmpty()) {
LOG.info("Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
Map<String, List<Pair<String, String>>> invalidPathsByPartition = invalidDataPaths.stream().map(dp -> Pair.of(new Path(basePath, dp).getParent().toString(), new Path(basePath, dp).toString())).collect(Collectors.groupingBy(Pair::getKey));
// Otherwise, we may miss deleting such files. If files are not found even after retries, fail the commit
if (consistencyCheckEnabled) {
// This will either ensure all files to be deleted are present.
waitForAllFiles(context, invalidPathsByPartition, FileVisibility.APPEAR);
}
// Now delete partially written files
context.setJobStatus(this.getClass().getSimpleName(), "Delete all partially written files");
deleteInvalidFilesByPartitions(context, invalidPathsByPartition);
// Now ensure the deleted files disappear
if (consistencyCheckEnabled) {
// This will either ensure all files to be deleted are absent.
waitForAllFiles(context, invalidPathsByPartition, FileVisibility.DISAPPEAR);
}
}
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class TestCompactionAdminClient method validateUnSchedulePlan.
/**
* Validate Unschedule operations.
*/
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client, String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames, boolean skipUnSchedule) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
// Log files belonging to file-slices created because of compaction request must be renamed
Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
if (skipUnSchedule) {
// Do the renaming only but do not touch the compaction plan - Needed for repair tests
renameFiles.forEach(lfPair -> {
try {
renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
} else {
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
}
Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Call the main unschedule API
client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
return renameFiles;
}
Aggregations