use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class TestIncrementalFSViewSync method testMultipleReplaceSteps.
private void testMultipleReplaceSteps(Map<String, List<String>> instantsToFiles, SyncableFileSystemView view, List<String> instants, int initialExpectedSlicesPerPartition) {
int expectedSlicesPerPartition = initialExpectedSlicesPerPartition;
for (int i = 0; i < instants.size(); i++) {
try {
generateReplaceInstant(instants.get(i), instantsToFiles);
view.sync();
metaClient.reloadActiveTimeline();
SyncableFileSystemView newView = getFileSystemView(metaClient);
// 1 fileId is replaced for every partition, so subtract partitions.size()
expectedSlicesPerPartition = expectedSlicesPerPartition + fileIdsPerPartition.size() - 1;
areViewsConsistent(view, newView, expectedSlicesPerPartition * partitions.size());
} catch (IOException e) {
throw new HoodieIOException("unable to test replace", e);
}
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieInputFormatUtils method refreshFileStatus.
/**
* Checks the file status for a race condition which can set the file size to 0. 1. HiveInputFormat does
* super.listStatus() and gets back a FileStatus[] 2. Then it creates the HoodieTableMetaClient for the paths listed.
* 3. Generation of splits looks at FileStatus size to create splits, which skips this file
* @param conf
* @param dataFile
* @return
*/
private static HoodieBaseFile refreshFileStatus(Configuration conf, HoodieBaseFile dataFile) {
Path dataPath = dataFile.getFileStatus().getPath();
try {
if (dataFile.getFileSize() == 0) {
FileSystem fs = dataPath.getFileSystem(conf);
LOG.info("Refreshing file status " + dataFile.getPath());
return new HoodieBaseFile(fs.getFileStatus(dataPath), dataFile.getBootstrapBaseFile().orElse(null));
}
return dataFile;
} catch (IOException e) {
throw new HoodieIOException("Could not get FileStatus on path " + dataPath);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieInputFormatUtils method getInputFormat.
public static FileInputFormat getInputFormat(HoodieFileFormat baseFileFormat, boolean realtime, Configuration conf) {
switch(baseFileFormat) {
case PARQUET:
if (realtime) {
HoodieParquetRealtimeInputFormat inputFormat = new HoodieParquetRealtimeInputFormat();
inputFormat.setConf(conf);
return inputFormat;
} else {
HoodieParquetInputFormat inputFormat = new HoodieParquetInputFormat();
inputFormat.setConf(conf);
return inputFormat;
}
case HFILE:
if (realtime) {
HoodieHFileRealtimeInputFormat inputFormat = new HoodieHFileRealtimeInputFormat();
inputFormat.setConf(conf);
return inputFormat;
} else {
HoodieHFileInputFormat inputFormat = new HoodieHFileInputFormat();
inputFormat.setConf(conf);
return inputFormat;
}
default:
throw new HoodieIOException("Hoodie InputFormat not implemented for base file format " + baseFileFormat);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class DFSTestSuitePathSelector method getNextFilePathsAndMaxModificationTime.
@Override
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr, long sourceLimit) {
Integer lastBatchId;
Integer nextBatchId;
try {
if (lastCheckpointStr.isPresent()) {
lastBatchId = Integer.parseInt(lastCheckpointStr.get());
nextBatchId = lastBatchId + 1;
} else {
lastBatchId = 0;
nextBatchId = 1;
}
// obtain all eligible files for the batch
List<FileStatus> eligibleFiles = new ArrayList<>();
FileStatus[] fileStatuses = fs.globStatus(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP), "*"));
// Say input data is as follow input/1, input/2, input/5 since 3,4 was rolled back and 5 is new generated data
// checkpoint from the latest commit metadata will be 2 since 3,4 has been rolled back. We need to set the
// next batch id correctly as 5 instead of 3
Option<String> correctBatchIdDueToRollback = Option.fromJavaOptional(Arrays.stream(fileStatuses).map(f -> f.getPath().toString().split("/")[f.getPath().toString().split("/").length - 1]).filter(bid1 -> Integer.parseInt(bid1) > lastBatchId).min((bid1, bid2) -> Integer.min(Integer.parseInt(bid1), Integer.parseInt(bid2))));
if (correctBatchIdDueToRollback.isPresent() && Integer.parseInt(correctBatchIdDueToRollback.get()) > nextBatchId) {
nextBatchId = Integer.parseInt(correctBatchIdDueToRollback.get());
}
log.info("Using DFSTestSuitePathSelector, checkpoint: " + lastCheckpointStr + " sourceLimit: " + sourceLimit + " lastBatchId: " + lastBatchId + " nextBatchId: " + nextBatchId);
for (FileStatus fileStatus : fileStatuses) {
if (!fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream().anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) {
continue;
} else if (Integer.parseInt(fileStatus.getPath().getName()) > lastBatchId && Integer.parseInt(fileStatus.getPath().getName()) <= nextBatchId) {
RemoteIterator<LocatedFileStatus> files = fs.listFiles(fileStatus.getPath(), true);
while (files.hasNext()) {
eligibleFiles.add(files.next());
}
}
}
// no data to readAvro
if (eligibleFiles.size() == 0) {
return new ImmutablePair<>(Option.empty(), lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE)));
}
// readAvro the files out.
String pathStr = eligibleFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(nextBatchId));
} catch (IOException ioe) {
throw new HoodieIOException("Unable to readAvro from source from checkpoint: " + lastCheckpointStr, ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieMergeOnReadTableInputFormat method createRealtimeFileStatusUnchecked.
/**
* Creates {@link RealtimeFileStatus} for the file-slice where base file is present
*/
private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieBaseFile baseFile, Stream<HoodieLogFile> logFiles, String basePath, Option<HoodieInstant> latestCompletedInstantOpt, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
FileStatus baseFileStatus = getFileStatusUnchecked(baseFile);
List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
try {
RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(baseFileStatus, basePath, sortedLogFiles, false, virtualKeyInfoOpt);
if (latestCompletedInstantOpt.isPresent()) {
HoodieInstant latestCompletedInstant = latestCompletedInstantOpt.get();
checkState(latestCompletedInstant.isCompleted());
rtFileStatus.setMaxCommitTime(latestCompletedInstant.getTimestamp());
}
if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
rtFileStatus.setBootStrapFileStatus(baseFileStatus);
}
return rtFileStatus;
} catch (IOException e) {
throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
}
}
Aggregations