Search in sources :

Example 41 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class ContinuousFileMonitoringFunction method listEligibleFiles.

/**
 * Returns the paths of the files not yet processed.
 *
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) {
    final FileStatus[] statuses;
    try {
        statuses = fileSystem.listStatus(path);
    } catch (IOException e) {
        // delay the check for eligible files in this case
        return Collections.emptyMap();
    }
    if (statuses == null) {
        LOG.warn("Path does not exist: {}", path);
        return Collections.emptyMap();
    } else {
        Map<Path, FileStatus> files = new HashMap<>();
        // handle the new files
        for (FileStatus status : statuses) {
            if (!status.isDir()) {
                Path filePath = status.getPath();
                long modificationTime = status.getModificationTime();
                if (!shouldIgnore(filePath, modificationTime)) {
                    files.put(filePath, status);
                }
            } else if (format.getNestedFileEnumeration() && format.acceptFile(status)) {
                files.putAll(listEligibleFiles(fileSystem, status.getPath()));
            }
        }
        return files;
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) HashMap(java.util.HashMap) IOException(java.io.IOException)

Example 42 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class FileCacheDirectoriesTest method testDirectoryCleanUp.

@Test
public void testDirectoryCleanUp() throws Exception {
    JobID jobID = new JobID();
    ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
    final String fileName = "test_file";
    // copy / create the file
    final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true);
    Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
    fileCache.createTmpFile(fileName, entry, jobID, attemptID2);
    final Path dstPath = copyResult.get();
    final FileSystem fs = dstPath.getFileSystem();
    final FileStatus fileStatus = fs.getFileStatus(dstPath);
    final Path cacheFile = new Path(dstPath, "cacheFile");
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    fileCache.releaseJob(jobID, attemptID1);
    // still should be available
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    fileCache.releaseJob(jobID, attemptID2);
    // still should be available, file will be deleted after cleanupInterval
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    // after a while, the file should disappear
    assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
    executorService.lastDeleteProcess.run();
    assertFalse(fs.exists(dstPath));
    assertFalse(fs.exists(cacheFile));
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) FileStatus(org.apache.flink.core.fs.FileStatus) DistributedCache(org.apache.flink.api.common.cache.DistributedCache) FileSystem(org.apache.flink.core.fs.FileSystem) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 43 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class AbstractFileCheckpointStorageAccessTestBase method testPersistMultipleMetadataOnlyCheckpoints.

// ------------------------------------------------------------------------
// checkpoints
// ------------------------------------------------------------------------
/**
 * Validates that multiple checkpoints from different jobs with the same checkpoint ID do not
 * interfere with each other.
 */
@Test
public void testPersistMultipleMetadataOnlyCheckpoints() throws Exception {
    final FileSystem fs = FileSystem.getLocalFileSystem();
    final Path checkpointDir = new Path(tmp.newFolder().toURI());
    final long checkpointId = 177;
    final CheckpointStorageAccess storage1 = createCheckpointStorage(checkpointDir);
    storage1.initializeBaseLocationsForCheckpoint();
    final CheckpointStorageAccess storage2 = createCheckpointStorage(checkpointDir);
    storage2.initializeBaseLocationsForCheckpoint();
    final CheckpointStorageLocation loc1 = storage1.initializeLocationForCheckpoint(checkpointId);
    final CheckpointStorageLocation loc2 = storage2.initializeLocationForCheckpoint(checkpointId);
    final byte[] data1 = { 77, 66, 55, 99, 88 };
    final byte[] data2 = { 1, 3, 2, 5, 4 };
    final CompletedCheckpointStorageLocation completedLocation1;
    try (CheckpointMetadataOutputStream out = loc1.createMetadataOutputStream()) {
        out.write(data1);
        completedLocation1 = out.closeAndFinalizeCheckpoint();
    }
    final String result1 = completedLocation1.getExternalPointer();
    final CompletedCheckpointStorageLocation completedLocation2;
    try (CheckpointMetadataOutputStream out = loc2.createMetadataOutputStream()) {
        out.write(data2);
        completedLocation2 = out.closeAndFinalizeCheckpoint();
    }
    final String result2 = completedLocation2.getExternalPointer();
    // check that this went to a file, but in a nested directory structure
    // one directory per storage
    FileStatus[] files = fs.listStatus(checkpointDir);
    assertEquals(2, files.length);
    // in each per-storage directory, one for the checkpoint
    FileStatus[] job1Files = fs.listStatus(files[0].getPath());
    FileStatus[] job2Files = fs.listStatus(files[1].getPath());
    assertTrue(job1Files.length >= 1);
    assertTrue(job2Files.length >= 1);
    assertTrue(fs.exists(new Path(result1, AbstractFsCheckpointStorageAccess.METADATA_FILE_NAME)));
    assertTrue(fs.exists(new Path(result2, AbstractFsCheckpointStorageAccess.METADATA_FILE_NAME)));
    // check that both storages can resolve each others contents
    validateContents(storage1.resolveCheckpoint(result1).getMetadataHandle(), data1);
    validateContents(storage1.resolveCheckpoint(result2).getMetadataHandle(), data2);
    validateContents(storage2.resolveCheckpoint(result1).getMetadataHandle(), data1);
    validateContents(storage2.resolveCheckpoint(result2).getMetadataHandle(), data2);
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) CheckpointMetadataOutputStream(org.apache.flink.runtime.state.CheckpointMetadataOutputStream) CheckpointStorageLocation(org.apache.flink.runtime.state.CheckpointStorageLocation) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) CheckpointStorageAccess(org.apache.flink.runtime.state.CheckpointStorageAccess) MemoryBackendCheckpointStorageAccess(org.apache.flink.runtime.state.memory.MemoryBackendCheckpointStorageAccess) Test(org.junit.Test)

Example 44 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class NonSplittingRecursiveEnumerator method enumerateSplits.

// ------------------------------------------------------------------------
@Override
public Collection<FileSourceSplit> enumerateSplits(Path[] paths, int minDesiredSplits) throws IOException {
    final ArrayList<FileSourceSplit> splits = new ArrayList<>();
    for (Path path : paths) {
        final FileSystem fs = path.getFileSystem();
        final FileStatus status = fs.getFileStatus(path);
        addSplitsForPath(status, fs, splits);
    }
    return splits;
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) FileSystem(org.apache.flink.core.fs.FileSystem) ArrayList(java.util.ArrayList)

Aggregations

FileStatus (org.apache.flink.core.fs.FileStatus)44 Path (org.apache.flink.core.fs.Path)27 FileSystem (org.apache.flink.core.fs.FileSystem)22 ArrayList (java.util.ArrayList)15 IOException (java.io.IOException)12 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)7 File (java.io.File)5 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)5 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)4 HashSet (java.util.HashSet)3 List (java.util.List)3 Map (java.util.Map)3 JobID (org.apache.flink.api.common.JobID)3 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)3 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)3 FileNotFoundException (java.io.FileNotFoundException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 Arrays (java.util.Arrays)2