use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class ContinuousFileMonitoringFunction method listEligibleFiles.
/**
* Returns the paths of the files not yet processed.
*
* @param fileSystem The filesystem where the monitored directory resides.
*/
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) {
final FileStatus[] statuses;
try {
statuses = fileSystem.listStatus(path);
} catch (IOException e) {
// delay the check for eligible files in this case
return Collections.emptyMap();
}
if (statuses == null) {
LOG.warn("Path does not exist: {}", path);
return Collections.emptyMap();
} else {
Map<Path, FileStatus> files = new HashMap<>();
// handle the new files
for (FileStatus status : statuses) {
if (!status.isDir()) {
Path filePath = status.getPath();
long modificationTime = status.getModificationTime();
if (!shouldIgnore(filePath, modificationTime)) {
files.put(filePath, status);
}
} else if (format.getNestedFileEnumeration() && format.acceptFile(status)) {
files.putAll(listEligibleFiles(fileSystem, status.getPath()));
}
}
return files;
}
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class FileCacheDirectoriesTest method testDirectoryCleanUp.
@Test
public void testDirectoryCleanUp() throws Exception {
JobID jobID = new JobID();
ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
final String fileName = "test_file";
// copy / create the file
final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true);
Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
fileCache.createTmpFile(fileName, entry, jobID, attemptID2);
final Path dstPath = copyResult.get();
final FileSystem fs = dstPath.getFileSystem();
final FileStatus fileStatus = fs.getFileStatus(dstPath);
final Path cacheFile = new Path(dstPath, "cacheFile");
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
fileCache.releaseJob(jobID, attemptID1);
// still should be available
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
fileCache.releaseJob(jobID, attemptID2);
// still should be available, file will be deleted after cleanupInterval
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
// after a while, the file should disappear
assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
executorService.lastDeleteProcess.run();
assertFalse(fs.exists(dstPath));
assertFalse(fs.exists(cacheFile));
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class AbstractFileCheckpointStorageAccessTestBase method testPersistMultipleMetadataOnlyCheckpoints.
// ------------------------------------------------------------------------
// checkpoints
// ------------------------------------------------------------------------
/**
* Validates that multiple checkpoints from different jobs with the same checkpoint ID do not
* interfere with each other.
*/
@Test
public void testPersistMultipleMetadataOnlyCheckpoints() throws Exception {
final FileSystem fs = FileSystem.getLocalFileSystem();
final Path checkpointDir = new Path(tmp.newFolder().toURI());
final long checkpointId = 177;
final CheckpointStorageAccess storage1 = createCheckpointStorage(checkpointDir);
storage1.initializeBaseLocationsForCheckpoint();
final CheckpointStorageAccess storage2 = createCheckpointStorage(checkpointDir);
storage2.initializeBaseLocationsForCheckpoint();
final CheckpointStorageLocation loc1 = storage1.initializeLocationForCheckpoint(checkpointId);
final CheckpointStorageLocation loc2 = storage2.initializeLocationForCheckpoint(checkpointId);
final byte[] data1 = { 77, 66, 55, 99, 88 };
final byte[] data2 = { 1, 3, 2, 5, 4 };
final CompletedCheckpointStorageLocation completedLocation1;
try (CheckpointMetadataOutputStream out = loc1.createMetadataOutputStream()) {
out.write(data1);
completedLocation1 = out.closeAndFinalizeCheckpoint();
}
final String result1 = completedLocation1.getExternalPointer();
final CompletedCheckpointStorageLocation completedLocation2;
try (CheckpointMetadataOutputStream out = loc2.createMetadataOutputStream()) {
out.write(data2);
completedLocation2 = out.closeAndFinalizeCheckpoint();
}
final String result2 = completedLocation2.getExternalPointer();
// check that this went to a file, but in a nested directory structure
// one directory per storage
FileStatus[] files = fs.listStatus(checkpointDir);
assertEquals(2, files.length);
// in each per-storage directory, one for the checkpoint
FileStatus[] job1Files = fs.listStatus(files[0].getPath());
FileStatus[] job2Files = fs.listStatus(files[1].getPath());
assertTrue(job1Files.length >= 1);
assertTrue(job2Files.length >= 1);
assertTrue(fs.exists(new Path(result1, AbstractFsCheckpointStorageAccess.METADATA_FILE_NAME)));
assertTrue(fs.exists(new Path(result2, AbstractFsCheckpointStorageAccess.METADATA_FILE_NAME)));
// check that both storages can resolve each others contents
validateContents(storage1.resolveCheckpoint(result1).getMetadataHandle(), data1);
validateContents(storage1.resolveCheckpoint(result2).getMetadataHandle(), data2);
validateContents(storage2.resolveCheckpoint(result1).getMetadataHandle(), data1);
validateContents(storage2.resolveCheckpoint(result2).getMetadataHandle(), data2);
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class NonSplittingRecursiveEnumerator method enumerateSplits.
// ------------------------------------------------------------------------
@Override
public Collection<FileSourceSplit> enumerateSplits(Path[] paths, int minDesiredSplits) throws IOException {
final ArrayList<FileSourceSplit> splits = new ArrayList<>();
for (Path path : paths) {
final FileSystem fs = path.getFileSystem();
final FileStatus status = fs.getFileStatus(path);
addSplitsForPath(status, fs, splits);
}
return splits;
}
Aggregations