use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieActiveTimeline method deleteInstantFileIfExists.
private void deleteInstantFileIfExists(HoodieInstant instant) {
LOG.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
try {
if (metaClient.getFs().exists(inFlightCommitFilePath)) {
boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
if (result) {
LOG.info("Removed instant " + instant);
} else {
throw new HoodieIOException("Could not delete instant " + instant);
}
} else {
LOG.warn("The commit " + inFlightCommitFilePath + " to remove does not exist");
}
} catch (IOException e) {
throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class DFSPathSelector method getNextFilePathsAndMaxModificationTime.
/**
* Get the list of files changed since last checkpoint.
*
* @param lastCheckpointStr the last checkpoint time string, empty if first run
* @param sourceLimit max bytes to read each time
* @return the list of files concatenated and their latest modified time
*/
@Deprecated
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr, long sourceLimit) {
try {
// obtain all eligible files under root folder.
log.info("Root path => " + props.getString(Config.ROOT_INPUT_PATH_PROP) + " source limit => " + sourceLimit);
long lastCheckpointTime = lastCheckpointStr.map(Long::parseLong).orElse(Long.MIN_VALUE);
List<FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), lastCheckpointTime);
// sort them by modification time.
eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime));
// Filter based on checkpoint & input size, if needed
long currentBytes = 0;
long newCheckpointTime = lastCheckpointTime;
List<FileStatus> filteredFiles = new ArrayList<>();
for (FileStatus f : eligibleFiles) {
if (currentBytes + f.getLen() >= sourceLimit && f.getModificationTime() > newCheckpointTime) {
// so that some files with the same modification time won't be skipped in next read
break;
}
newCheckpointTime = f.getModificationTime();
currentBytes += f.getLen();
filteredFiles.add(f);
}
// no data to read
if (filteredFiles.isEmpty()) {
return new ImmutablePair<>(Option.empty(), String.valueOf(newCheckpointTime));
}
// read the files out.
String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(newCheckpointTime));
} catch (IOException ioe) {
throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class SchemaRegistryProvider method getTargetSchema.
@Override
public Schema getTargetSchema() {
String registryUrl = config.getString(Config.SRC_SCHEMA_REGISTRY_URL_PROP);
String targetRegistryUrl = config.getString(Config.TARGET_SCHEMA_REGISTRY_URL_PROP, registryUrl);
try {
return getSchema(targetRegistryUrl);
} catch (IOException ioe) {
throw new HoodieIOException("Error reading target schema from registry :" + registryUrl, ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class BaseHoodieWriteClient method archive.
/**
* Trigger archival for the table. This ensures that the number of commits do not explode
* and keep increasing unbounded over time.
* @param table table to commit on.
*/
protected void archive(HoodieTable table) {
if (!tableServicesEnabled(config)) {
return;
}
try {
// We cannot have unbounded commit files. Archive commits if we have to archive
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
archiver.archiveIfRequired(context);
} catch (IOException ioe) {
throw new HoodieIOException("Failed to archive", ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class BaseHoodieWriteClient method setWriteSchemaForDeletes.
/**
* Sets write schema from last instant since deletes may not have schema set in the config.
*/
protected void setWriteSchemaForDeletes(HoodieTableMetaClient metaClient) {
try {
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
Option<HoodieInstant> lastInstant = activeTimeline.filterCompletedInstants().filter(s -> s.getAction().equals(metaClient.getCommitActionType()) || s.getAction().equals(HoodieActiveTimeline.REPLACE_COMMIT_ACTION)).lastInstant();
if (lastInstant.isPresent()) {
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
if (commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)) {
config.setSchema(commitMetadata.getExtraMetadata().get(HoodieCommitMetadata.SCHEMA_KEY));
} else {
throw new HoodieIOException("Latest commit does not have any schema in commit metadata");
}
} else {
throw new HoodieIOException("Deletes issued without any prior commits");
}
} catch (IOException e) {
throw new HoodieIOException("IOException thrown while reading last commit metadata", e);
}
}
Aggregations