Search in sources :

Example 36 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class HistoryServerArchiveFetcher method fetchArchives.

void fetchArchives() {
    try {
        LOG.debug("Starting archive fetching.");
        List<ArchiveEvent> events = new ArrayList<>();
        Map<Path, Set<String>> jobsToRemove = new HashMap<>();
        cachedArchivesPerRefreshDirectory.forEach((path, archives) -> jobsToRemove.put(path, new HashSet<>(archives)));
        Map<Path, Set<Path>> archivesBeyondSizeLimit = new HashMap<>();
        for (HistoryServer.RefreshLocation refreshLocation : refreshDirs) {
            Path refreshDir = refreshLocation.getPath();
            LOG.debug("Checking archive directory {}.", refreshDir);
            // contents of /:refreshDir
            FileStatus[] jobArchives;
            try {
                jobArchives = listArchives(refreshLocation.getFs(), refreshDir);
            } catch (IOException e) {
                LOG.error("Failed to access job archive location for path {}.", refreshDir, e);
                // something went wrong, potentially due to a concurrent deletion
                // do not remove any jobs now; we will retry later
                jobsToRemove.remove(refreshDir);
                continue;
            }
            int historySize = 0;
            for (FileStatus jobArchive : jobArchives) {
                Path jobArchivePath = jobArchive.getPath();
                String jobID = jobArchivePath.getName();
                if (!isValidJobID(jobID, refreshDir)) {
                    continue;
                }
                jobsToRemove.get(refreshDir).remove(jobID);
                historySize++;
                if (historySize > maxHistorySize && processBeyondLimitArchiveDeletion) {
                    archivesBeyondSizeLimit.computeIfAbsent(refreshDir, ignored -> new HashSet<>()).add(jobArchivePath);
                    continue;
                }
                if (cachedArchivesPerRefreshDirectory.get(refreshDir).contains(jobID)) {
                    LOG.trace("Ignoring archive {} because it was already fetched.", jobArchivePath);
                } else {
                    LOG.info("Processing archive {}.", jobArchivePath);
                    try {
                        processArchive(jobID, jobArchivePath);
                        events.add(new ArchiveEvent(jobID, ArchiveEventType.CREATED));
                        cachedArchivesPerRefreshDirectory.get(refreshDir).add(jobID);
                        LOG.info("Processing archive {} finished.", jobArchivePath);
                    } catch (IOException e) {
                        LOG.error("Failure while fetching/processing job archive for job {}.", jobID, e);
                        deleteJobFiles(jobID);
                    }
                }
            }
        }
        if (jobsToRemove.values().stream().flatMap(Set::stream).findAny().isPresent() && processExpiredArchiveDeletion) {
            events.addAll(cleanupExpiredJobs(jobsToRemove));
        }
        if (!archivesBeyondSizeLimit.isEmpty() && processBeyondLimitArchiveDeletion) {
            events.addAll(cleanupJobsBeyondSizeLimit(archivesBeyondSizeLimit));
        }
        if (!events.isEmpty()) {
            updateJobOverview(webOverviewDir, webDir);
        }
        events.forEach(jobArchiveEventListener::accept);
        LOG.debug("Finished archive fetching.");
    } catch (Exception e) {
        LOG.error("Critical failure while fetching/processing job archives.", e);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Arrays(java.util.Arrays) JsonFactory(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonFactory) FileUtils(org.apache.flink.util.FileUtils) LoggerFactory(org.slf4j.LoggerFactory) JsonGenerator(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator) HashMap(java.util.HashMap) JobStatus(org.apache.flink.api.common.JobStatus) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Path(org.apache.flink.core.fs.Path) ObjectMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper) Map(java.util.Map) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) HistoryServerOptions(org.apache.flink.configuration.HistoryServerOptions) FileStatus(org.apache.flink.core.fs.FileStatus) Logger(org.slf4j.Logger) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) Files(java.nio.file.Files) StringWriter(java.io.StringWriter) Collection(java.util.Collection) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) FileWriter(java.io.FileWriter) Set(java.util.Set) JobsOverviewHeaders(org.apache.flink.runtime.rest.messages.JobsOverviewHeaders) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) FsJobArchivist(org.apache.flink.runtime.history.FsJobArchivist) File(java.io.File) Consumer(java.util.function.Consumer) MultipleJobsDetails(org.apache.flink.runtime.messages.webmonitor.MultipleJobsDetails) List(java.util.List) JobID(org.apache.flink.api.common.JobID) FileSystem(org.apache.flink.core.fs.FileSystem) Comparator(java.util.Comparator) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) FileStatus(org.apache.flink.core.fs.FileStatus) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) HashSet(java.util.HashSet)

Example 37 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class TestingFileSystem method addParentDirectories.

private static void addParentDirectories(final TestFileStatus file, final Map<Path, TestFileStatus> files, final Map<Path, Collection<FileStatus>> directories) {
    final Path parentPath = file.getPath().getParent();
    if (parentPath == null) {
        return;
    }
    final TestFileStatus parentStatus = TestFileStatus.forDirectory(parentPath);
    directories.computeIfAbsent(parentPath, (key) -> new ArrayList<>()).add(file);
    final TestFileStatus existingParent = files.putIfAbsent(parentPath, parentStatus);
    if (existingParent == null) {
        addParentDirectories(parentStatus, files, directories);
    } else {
        checkArgument(existingParent.isDir(), "have a file already for a directory path");
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Arrays(java.util.Arrays) FileStatus(org.apache.flink.core.fs.FileStatus) Collection(java.util.Collection) FileSystemKind(org.apache.flink.core.fs.FileSystemKind) IOException(java.io.IOException) HashMap(java.util.HashMap) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) Field(java.lang.reflect.Field) Constructor(java.lang.reflect.Constructor) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) FileSystem(org.apache.flink.core.fs.FileSystem) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) Path(org.apache.flink.core.fs.Path) BlockLocation(org.apache.flink.core.fs.BlockLocation) Map(java.util.Map) URI(java.net.URI) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) Nullable(javax.annotation.Nullable) ArrayList(java.util.ArrayList)

Example 38 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class LocalFileSystemTest method testLocalFilesystem.

/**
 * This test checks the functionality of the {@link LocalFileSystem} class.
 */
@Test
public void testLocalFilesystem() throws Exception {
    final File tempdir = new File(temporaryFolder.getRoot(), UUID.randomUUID().toString());
    final File testfile1 = new File(tempdir, UUID.randomUUID().toString());
    final File testfile2 = new File(tempdir, UUID.randomUUID().toString());
    final Path pathtotestfile1 = new Path(testfile1.toURI().getPath());
    final Path pathtotestfile2 = new Path(testfile2.toURI().getPath());
    final LocalFileSystem lfs = new LocalFileSystem();
    final Path pathtotmpdir = new Path(tempdir.toURI().getPath());
    /*
         * check that lfs can see/create/delete/read directories
         */
    // check that dir is not existent yet
    assertFalse(lfs.exists(pathtotmpdir));
    assertTrue(tempdir.mkdirs());
    // check that local file system recognizes file..
    assertTrue(lfs.exists(pathtotmpdir));
    final FileStatus localstatus1 = lfs.getFileStatus(pathtotmpdir);
    // check that lfs recognizes directory..
    assertTrue(localstatus1.isDir());
    // get status for files in this (empty) directory..
    final FileStatus[] statusforfiles = lfs.listStatus(pathtotmpdir);
    // no files in there.. hence, must be zero
    assertTrue(statusforfiles.length == 0);
    // check that lfs can delete directory..
    lfs.delete(pathtotmpdir, true);
    // double check that directory is not existent anymore..
    assertFalse(lfs.exists(pathtotmpdir));
    assertFalse(tempdir.exists());
    // re-create directory..
    lfs.mkdirs(pathtotmpdir);
    // creation successful?
    assertTrue(tempdir.exists());
    /*
         * check that lfs can create/read/write from/to files properly and read meta information..
         */
    // create files.. one ""natively"", one using lfs
    final FSDataOutputStream lfsoutput1 = lfs.create(pathtotestfile1, WriteMode.NO_OVERWRITE);
    assertTrue(testfile2.createNewFile());
    // does lfs create files? does lfs recognize created files?
    assertTrue(testfile1.exists());
    assertTrue(lfs.exists(pathtotestfile2));
    // test that lfs can write to files properly
    final byte[] testbytes = { 1, 2, 3, 4, 5 };
    lfsoutput1.write(testbytes);
    lfsoutput1.close();
    assertEquals(testfile1.length(), 5L);
    byte[] testbytestest = new byte[5];
    try (FileInputStream fisfile1 = new FileInputStream(testfile1)) {
        assertEquals(testbytestest.length, fisfile1.read(testbytestest));
    }
    assertArrayEquals(testbytes, testbytestest);
    // does lfs see the correct file length?
    assertEquals(lfs.getFileStatus(pathtotestfile1).getLen(), testfile1.length());
    // as well, when we call the listStatus (that is intended for directories?)
    assertEquals(lfs.listStatus(pathtotestfile1)[0].getLen(), testfile1.length());
    // test that lfs can read files properly
    final FileOutputStream fosfile2 = new FileOutputStream(testfile2);
    fosfile2.write(testbytes);
    fosfile2.close();
    testbytestest = new byte[5];
    final FSDataInputStream lfsinput2 = lfs.open(pathtotestfile2);
    assertEquals(lfsinput2.read(testbytestest), 5);
    lfsinput2.close();
    assertTrue(Arrays.equals(testbytes, testbytestest));
    // does lfs see two files?
    assertEquals(lfs.listStatus(pathtotmpdir).length, 2);
    // do we get exactly one blocklocation per file? no matter what start and len we provide
    assertEquals(lfs.getFileBlockLocations(lfs.getFileStatus(pathtotestfile1), 0, 0).length, 1);
    /*
         * can lfs delete files / directories?
         */
    assertTrue(lfs.delete(pathtotestfile1, false));
    // and can lfs also delete directories recursively?
    assertTrue(lfs.delete(pathtotmpdir, true));
    assertTrue(!tempdir.exists());
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileOutputStream(java.io.FileOutputStream) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 39 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class AvroParquetRecordFormatTest method createReader.

// ------------------------------------------------------------------------
// helper methods
// ------------------------------------------------------------------------
private <T> StreamFormat.Reader<T> createReader(AvroParquetRecordFormat<T> format, Configuration config, Path filePath, long splitOffset, long splitLength) throws IOException {
    final FileSystem fileSystem = filePath.getFileSystem();
    final FileStatus fileStatus = fileSystem.getFileStatus(filePath);
    final FSDataInputStream inputStream = fileSystem.open(filePath);
    if (format.isSplittable()) {
        inputStream.seek(splitOffset);
    } else {
        inputStream.seek(0);
        checkArgument(splitLength == fileStatus.getLen());
    }
    return format.createReader(config, inputStream, fileStatus.getLen(), splitOffset + splitLength);
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 40 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class ContinuousFileMonitoringFunction method monitorDirAndForwardSplits.

private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException {
    assert (Thread.holdsLock(checkpointLock));
    Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
    Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);
    for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits : splitsSortedByModTime.entrySet()) {
        long modificationTime = splits.getKey();
        for (TimestampedFileInputSplit split : splits.getValue()) {
            LOG.info("Forwarding split: " + split);
            context.collect(split);
        }
        // update the global modification time
        globalModificationTime = Math.max(globalModificationTime, modificationTime);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Aggregations

FileStatus (org.apache.flink.core.fs.FileStatus)44 Path (org.apache.flink.core.fs.Path)27 FileSystem (org.apache.flink.core.fs.FileSystem)22 ArrayList (java.util.ArrayList)15 IOException (java.io.IOException)12 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)7 File (java.io.File)5 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)5 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)4 HashSet (java.util.HashSet)3 List (java.util.List)3 Map (java.util.Map)3 JobID (org.apache.flink.api.common.JobID)3 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)3 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)3 FileNotFoundException (java.io.FileNotFoundException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 Arrays (java.util.Arrays)2