use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieDeltaStreamer method sync.
/**
* Main method to start syncing.
*
* @throws Exception
*/
public void sync() throws Exception {
if (bootstrapExecutor.isPresent()) {
LOG.info("Performing bootstrap. Source=" + bootstrapExecutor.get().getBootstrapConfig().getBootstrapSourceBasePath());
bootstrapExecutor.get().execute();
} else {
if (cfg.continuousMode) {
deltaSyncService.ifPresent(ds -> {
ds.start(this::onDeltaSyncShutdown);
try {
ds.waitForShutdown();
} catch (Exception e) {
throw new HoodieException(e.getMessage(), e);
}
});
LOG.info("Delta Sync shutting down");
} else {
LOG.info("Delta Streamer running only single round");
try {
deltaSyncService.ifPresent(ds -> {
try {
ds.getDeltaSync().syncOnce();
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
} catch (Exception ex) {
LOG.error("Got error running delta sync once. Shutting down", ex);
throw ex;
} finally {
deltaSyncService.ifPresent(DeltaSyncService::close);
LOG.info("Shut down delta streamer");
}
}
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieRepairTool method run.
public boolean run() {
Option<String> startingInstantOption = Option.ofNullable(cfg.startingInstantTime);
Option<String> endingInstantOption = Option.ofNullable(cfg.endingInstantTime);
if (startingInstantOption.isPresent() && endingInstantOption.isPresent()) {
LOG.info(String.format("Start repairing completed instants between %s and %s (inclusive)", startingInstantOption.get(), endingInstantOption.get()));
} else if (startingInstantOption.isPresent()) {
LOG.info(String.format("Start repairing completed instants from %s (inclusive)", startingInstantOption.get()));
} else if (endingInstantOption.isPresent()) {
LOG.info(String.format("Start repairing completed instants till %s (inclusive)", endingInstantOption.get()));
} else {
LOG.info("Start repairing all completed instants");
}
try {
Mode mode = Mode.valueOf(cfg.runningMode.toUpperCase());
switch(mode) {
case REPAIR:
LOG.info(" ****** The repair tool is in REPAIR mode, dangling data and logs files " + "not belonging to any commit are going to be DELETED from the table ******");
if (checkBackupPathForRepair() < 0) {
LOG.error("Backup path check failed.");
return false;
}
return doRepair(startingInstantOption, endingInstantOption, false);
case DRY_RUN:
LOG.info(" ****** The repair tool is in DRY_RUN mode, " + "only LOOKING FOR dangling data and log files from the table ******");
return doRepair(startingInstantOption, endingInstantOption, true);
case UNDO:
if (checkBackupPathAgainstBasePath() < 0) {
LOG.error("Backup path check failed.");
return false;
}
return undoRepair();
default:
LOG.info("Unsupported running mode [" + cfg.runningMode + "], quit the job directly");
return false;
}
} catch (IOException e) {
throw new HoodieIOException("Unable to repair table in " + cfg.basePath, e);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class MarkerDirState method writeMarkerTypeToFile.
/**
* Writes marker type, "TIMELINE_SERVER_BASED", to file.
*/
private void writeMarkerTypeToFile() {
Path dirPath = new Path(markerDirPath);
try {
if (!fileSystem.exists(dirPath)) {
// There is no existing marker directory, create a new directory and write marker type
fileSystem.mkdirs(dirPath);
MarkerUtils.writeMarkerTypeToFile(MarkerType.TIMELINE_SERVER_BASED, fileSystem, markerDirPath);
}
} catch (IOException e) {
throw new HoodieIOException("Failed to write marker type file in " + markerDirPath + ": " + e.getMessage(), e);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HiveIncrPullSource method fetchNewData.
@Override
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
try {
// find the source commit to pull
Option<String> commitToPull = findCommitToPull(lastCheckpointStr);
if (!commitToPull.isPresent()) {
return new InputBatch<>(Option.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : "");
}
// read the files out.
List<FileStatus> commitDeltaFiles = Arrays.asList(fs.listStatus(new Path(incrPullRootPath, commitToPull.get())));
String pathStr = commitDeltaFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
JavaPairRDD<AvroKey, NullWritable> avroRDD = sparkContext.newAPIHadoopFile(pathStr, AvroKeyInputFormat.class, AvroKey.class, NullWritable.class, sparkContext.hadoopConfiguration());
sparkContext.setJobGroup(this.getClass().getSimpleName(), "Fetch new data");
return new InputBatch<>(Option.of(avroRDD.keys().map(r -> ((GenericRecord) r.datum()))), String.valueOf(commitToPull.get()));
} catch (IOException ioe) {
throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class CompactionAdminClient method validateCompactionOperation.
/**
* Check if a compaction operation is valid.
*
* @param metaClient Hoodie Table Meta client
* @param compactionInstant Compaction Instant
* @param operation Compaction Operation
* @param fsViewOpt File System View
*/
private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt) throws IOException {
HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Option<HoodieInstant> lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant();
try {
if (lastInstant.isPresent()) {
Option<FileSlice> fileSliceOptional = Option.fromJavaOptional(fileSystemView.getLatestUnCompactedFileSlices(operation.getPartitionPath()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst());
if (fileSliceOptional.isPresent()) {
FileSlice fs = fileSliceOptional.get();
Option<HoodieBaseFile> df = fs.getBaseFile();
if (operation.getDataFileName().isPresent()) {
String expPath = metaClient.getFs().getFileStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(operation.getDataFileName().get()))).getPath().toString();
ValidationUtils.checkArgument(df.isPresent(), "Data File must be present. File Slice was : " + fs + ", operation :" + operation);
ValidationUtils.checkArgument(df.get().getPath().equals(expPath), "Base Path in operation is specified as " + expPath + " but got path " + df.get().getPath());
}
Set<HoodieLogFile> logFilesInFileSlice = fs.getLogFiles().collect(Collectors.toSet());
Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFileNames().stream().map(dp -> {
try {
FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(dp)));
ValidationUtils.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status");
return new HoodieLogFile(fileStatuses[0]);
} catch (FileNotFoundException fe) {
throw new CompactionValidationException(fe.getMessage());
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}).collect(Collectors.toSet());
Set<HoodieLogFile> missing = logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(missing.isEmpty(), "All log files specified in compaction operation is not present. Missing :" + missing + ", Exp :" + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
Set<HoodieLogFile> diff = logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf)).collect(Collectors.toSet());
ValidationUtils.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)), "There are some log-files which are neither specified in compaction plan " + "nor present after compaction request instant. Some of these :" + diff);
} else {
throw new CompactionValidationException("Unable to find file-slice for file-id (" + operation.getFileId() + " Compaction operation is invalid.");
}
} else {
throw new CompactionValidationException("Unable to find any committed instant. Compaction Operation may be pointing to stale file-slices");
}
} catch (CompactionValidationException | IllegalArgumentException e) {
return new ValidationOpResult(operation, false, Option.of(e));
}
return new ValidationOpResult(operation, true, Option.empty());
}
Aggregations