use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class FlinkMergeAndReplaceHandle method deleteInvalidDataFile.
/**
* The flink checkpoints start in sequence and asynchronously, when one write task finish the checkpoint(A)
* (thus the fs view got the written data files some of which may be invalid),
* it goes on with the next round checkpoint(B) write immediately,
* if it tries to reuse the last small data bucket(small file) of an invalid data file,
* finally, when the coordinator receives the checkpoint success event of checkpoint(A),
* the invalid data file would be cleaned,
* and this merger got a FileNotFoundException when it close the write file handle.
*
* <p> To solve, deletes the invalid data file eagerly
* so that the invalid file small bucket would never be reused.
*
* @param lastAttemptId The last attempt ID
*/
private void deleteInvalidDataFile(long lastAttemptId) {
final String lastWriteToken = FSUtils.makeWriteToken(getPartitionId(), getStageId(), lastAttemptId);
final String lastDataFileName = FSUtils.makeDataFileName(instantTime, lastWriteToken, this.fileId, hoodieTable.getBaseFileExtension());
final Path path = makeNewFilePath(partitionPath, lastDataFileName);
try {
if (fs.exists(path)) {
LOG.info("Deleting invalid MERGE and REPLACE base file due to task retry: " + lastDataFileName);
fs.delete(path, false);
}
} catch (IOException e) {
throw new HoodieException("Error while deleting the MERGE and REPLACE base file due to task retry: " + lastDataFileName, e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class FlinkCreateHandle method makeNewPath.
@Override
public Path makeNewPath(String partitionPath) {
Path path = super.makeNewPath(partitionPath);
// Write to a new file which behaves like a different task write.
try {
int rollNumber = 0;
while (fs.exists(path)) {
Path existing = path;
path = newFilePathWithRollover(rollNumber++);
LOG.warn("Duplicate write for INSERT bucket with path: " + existing + ", rolls over to new path: " + path);
}
return path;
} catch (IOException e) {
throw new HoodieException("Checking existing path for create handle error: " + path, e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class FlinkCreateHandle method deleteInvalidDataFile.
/**
* The flink checkpoints start in sequence and asynchronously, when one write task finish the checkpoint(A)
* (thus the fs view got the written data files some of which may be invalid),
* it goes on with the next round checkpoint(B) write immediately,
* if it tries to reuse the last small data bucket(small file) of an invalid data file,
* finally, when the coordinator receives the checkpoint success event of checkpoint(A),
* the invalid data file would be cleaned,
* and this merger got a FileNotFoundException when it close the write file handle.
*
* <p> To solve, deletes the invalid data file eagerly
* so that the invalid file small bucket would never be reused.
*
* @param lastAttemptId The last attempt ID
*/
private void deleteInvalidDataFile(long lastAttemptId) {
final String lastWriteToken = FSUtils.makeWriteToken(getPartitionId(), getStageId(), lastAttemptId);
final String lastDataFileName = FSUtils.makeDataFileName(instantTime, lastWriteToken, this.fileId, hoodieTable.getBaseFileExtension());
final Path path = makeNewFilePath(partitionPath, lastDataFileName);
try {
if (fs.exists(path)) {
LOG.info("Deleting invalid INSERT file due to task retry: " + lastDataFileName);
fs.delete(path, false);
}
} catch (IOException e) {
throw new HoodieException("Error while deleting the INSERT file due to task retry: " + lastDataFileName, e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class TestBootstrapUtils method testAllLeafFoldersWithFiles.
@Test
public void testAllLeafFoldersWithFiles() throws IOException {
// All directories including marker dirs.
List<String> folders = Arrays.asList("2016/04/15", "2016/05/16", "2016/05/17");
folders.forEach(f -> {
try {
metaClient.getFs().mkdirs(new Path(new Path(basePath), f));
} catch (IOException e) {
throw new HoodieException(e);
}
});
// Files inside partitions and marker directories
List<String> files = Stream.of("2016/04/15/1_1-0-1_20190528120000", "2016/04/15/2_1-0-1_20190528120000", "2016/05/16/3_1-0-1_20190528120000", "2016/05/16/4_1-0-1_20190528120000", "2016/04/17/5_1-0-1_20190528120000", "2016/04/17/6_1-0-1_20190528120000").map(file -> file + metaClient.getTableConfig().getBaseFileFormat().getFileExtension()).collect(Collectors.toList());
files.forEach(f -> {
try {
metaClient.getFs().create(new Path(new Path(basePath), f));
} catch (IOException e) {
throw new HoodieException(e);
}
});
List<Pair<String, List<HoodieFileStatus>>> collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient, metaClient.getFs(), basePath, context);
assertEquals(3, collected.size());
collected.stream().forEach(k -> {
assertEquals(2, k.getRight().size());
});
// Simulate reading from un-partitioned dataset
collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient, metaClient.getFs(), basePath + "/" + folders.get(0), context);
assertEquals(1, collected.size());
collected.stream().forEach(k -> {
assertEquals(2, k.getRight().size());
});
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieClientTestUtils method getLatestBaseFiles.
public static List<HoodieBaseFile> getLatestBaseFiles(String basePath, FileSystem fs, String... paths) {
List<HoodieBaseFile> latestFiles = new ArrayList<>();
try {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
for (String path : paths) {
BaseFileOnlyView fileSystemView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
latestFiles.addAll(fileSystemView.getLatestBaseFiles().collect(Collectors.toList()));
}
} catch (Exception e) {
throw new HoodieException("Error reading hoodie table as a dataframe", e);
}
return latestFiles;
}
Aggregations