use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class HistoryServerArchiveFetcher method fetchArchives.
void fetchArchives() {
try {
LOG.debug("Starting archive fetching.");
List<ArchiveEvent> events = new ArrayList<>();
Map<Path, Set<String>> jobsToRemove = new HashMap<>();
cachedArchivesPerRefreshDirectory.forEach((path, archives) -> jobsToRemove.put(path, new HashSet<>(archives)));
Map<Path, Set<Path>> archivesBeyondSizeLimit = new HashMap<>();
for (HistoryServer.RefreshLocation refreshLocation : refreshDirs) {
Path refreshDir = refreshLocation.getPath();
LOG.debug("Checking archive directory {}.", refreshDir);
// contents of /:refreshDir
FileStatus[] jobArchives;
try {
jobArchives = listArchives(refreshLocation.getFs(), refreshDir);
} catch (IOException e) {
LOG.error("Failed to access job archive location for path {}.", refreshDir, e);
// something went wrong, potentially due to a concurrent deletion
// do not remove any jobs now; we will retry later
jobsToRemove.remove(refreshDir);
continue;
}
int historySize = 0;
for (FileStatus jobArchive : jobArchives) {
Path jobArchivePath = jobArchive.getPath();
String jobID = jobArchivePath.getName();
if (!isValidJobID(jobID, refreshDir)) {
continue;
}
jobsToRemove.get(refreshDir).remove(jobID);
historySize++;
if (historySize > maxHistorySize && processBeyondLimitArchiveDeletion) {
archivesBeyondSizeLimit.computeIfAbsent(refreshDir, ignored -> new HashSet<>()).add(jobArchivePath);
continue;
}
if (cachedArchivesPerRefreshDirectory.get(refreshDir).contains(jobID)) {
LOG.trace("Ignoring archive {} because it was already fetched.", jobArchivePath);
} else {
LOG.info("Processing archive {}.", jobArchivePath);
try {
processArchive(jobID, jobArchivePath);
events.add(new ArchiveEvent(jobID, ArchiveEventType.CREATED));
cachedArchivesPerRefreshDirectory.get(refreshDir).add(jobID);
LOG.info("Processing archive {} finished.", jobArchivePath);
} catch (IOException e) {
LOG.error("Failure while fetching/processing job archive for job {}.", jobID, e);
deleteJobFiles(jobID);
}
}
}
}
if (jobsToRemove.values().stream().flatMap(Set::stream).findAny().isPresent() && processExpiredArchiveDeletion) {
events.addAll(cleanupExpiredJobs(jobsToRemove));
}
if (!archivesBeyondSizeLimit.isEmpty() && processBeyondLimitArchiveDeletion) {
events.addAll(cleanupJobsBeyondSizeLimit(archivesBeyondSizeLimit));
}
if (!events.isEmpty()) {
updateJobOverview(webOverviewDir, webDir);
}
events.forEach(jobArchiveEventListener::accept);
LOG.debug("Finished archive fetching.");
} catch (Exception e) {
LOG.error("Critical failure while fetching/processing job archives.", e);
}
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class TestingFileSystem method addParentDirectories.
private static void addParentDirectories(final TestFileStatus file, final Map<Path, TestFileStatus> files, final Map<Path, Collection<FileStatus>> directories) {
final Path parentPath = file.getPath().getParent();
if (parentPath == null) {
return;
}
final TestFileStatus parentStatus = TestFileStatus.forDirectory(parentPath);
directories.computeIfAbsent(parentPath, (key) -> new ArrayList<>()).add(file);
final TestFileStatus existingParent = files.putIfAbsent(parentPath, parentStatus);
if (existingParent == null) {
addParentDirectories(parentStatus, files, directories);
} else {
checkArgument(existingParent.isDir(), "have a file already for a directory path");
}
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class LocalFileSystemTest method testLocalFilesystem.
/**
* This test checks the functionality of the {@link LocalFileSystem} class.
*/
@Test
public void testLocalFilesystem() throws Exception {
final File tempdir = new File(temporaryFolder.getRoot(), UUID.randomUUID().toString());
final File testfile1 = new File(tempdir, UUID.randomUUID().toString());
final File testfile2 = new File(tempdir, UUID.randomUUID().toString());
final Path pathtotestfile1 = new Path(testfile1.toURI().getPath());
final Path pathtotestfile2 = new Path(testfile2.toURI().getPath());
final LocalFileSystem lfs = new LocalFileSystem();
final Path pathtotmpdir = new Path(tempdir.toURI().getPath());
/*
* check that lfs can see/create/delete/read directories
*/
// check that dir is not existent yet
assertFalse(lfs.exists(pathtotmpdir));
assertTrue(tempdir.mkdirs());
// check that local file system recognizes file..
assertTrue(lfs.exists(pathtotmpdir));
final FileStatus localstatus1 = lfs.getFileStatus(pathtotmpdir);
// check that lfs recognizes directory..
assertTrue(localstatus1.isDir());
// get status for files in this (empty) directory..
final FileStatus[] statusforfiles = lfs.listStatus(pathtotmpdir);
// no files in there.. hence, must be zero
assertTrue(statusforfiles.length == 0);
// check that lfs can delete directory..
lfs.delete(pathtotmpdir, true);
// double check that directory is not existent anymore..
assertFalse(lfs.exists(pathtotmpdir));
assertFalse(tempdir.exists());
// re-create directory..
lfs.mkdirs(pathtotmpdir);
// creation successful?
assertTrue(tempdir.exists());
/*
* check that lfs can create/read/write from/to files properly and read meta information..
*/
// create files.. one ""natively"", one using lfs
final FSDataOutputStream lfsoutput1 = lfs.create(pathtotestfile1, WriteMode.NO_OVERWRITE);
assertTrue(testfile2.createNewFile());
// does lfs create files? does lfs recognize created files?
assertTrue(testfile1.exists());
assertTrue(lfs.exists(pathtotestfile2));
// test that lfs can write to files properly
final byte[] testbytes = { 1, 2, 3, 4, 5 };
lfsoutput1.write(testbytes);
lfsoutput1.close();
assertEquals(testfile1.length(), 5L);
byte[] testbytestest = new byte[5];
try (FileInputStream fisfile1 = new FileInputStream(testfile1)) {
assertEquals(testbytestest.length, fisfile1.read(testbytestest));
}
assertArrayEquals(testbytes, testbytestest);
// does lfs see the correct file length?
assertEquals(lfs.getFileStatus(pathtotestfile1).getLen(), testfile1.length());
// as well, when we call the listStatus (that is intended for directories?)
assertEquals(lfs.listStatus(pathtotestfile1)[0].getLen(), testfile1.length());
// test that lfs can read files properly
final FileOutputStream fosfile2 = new FileOutputStream(testfile2);
fosfile2.write(testbytes);
fosfile2.close();
testbytestest = new byte[5];
final FSDataInputStream lfsinput2 = lfs.open(pathtotestfile2);
assertEquals(lfsinput2.read(testbytestest), 5);
lfsinput2.close();
assertTrue(Arrays.equals(testbytes, testbytestest));
// does lfs see two files?
assertEquals(lfs.listStatus(pathtotmpdir).length, 2);
// do we get exactly one blocklocation per file? no matter what start and len we provide
assertEquals(lfs.getFileBlockLocations(lfs.getFileStatus(pathtotestfile1), 0, 0).length, 1);
/*
* can lfs delete files / directories?
*/
assertTrue(lfs.delete(pathtotestfile1, false));
// and can lfs also delete directories recursively?
assertTrue(lfs.delete(pathtotmpdir, true));
assertTrue(!tempdir.exists());
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class AvroParquetRecordFormatTest method createReader.
// ------------------------------------------------------------------------
// helper methods
// ------------------------------------------------------------------------
private <T> StreamFormat.Reader<T> createReader(AvroParquetRecordFormat<T> format, Configuration config, Path filePath, long splitOffset, long splitLength) throws IOException {
final FileSystem fileSystem = filePath.getFileSystem();
final FileStatus fileStatus = fileSystem.getFileStatus(filePath);
final FSDataInputStream inputStream = fileSystem.open(filePath);
if (format.isSplittable()) {
inputStream.seek(splitOffset);
} else {
inputStream.seek(0);
checkArgument(splitLength == fileStatus.getLen());
}
return format.createReader(config, inputStream, fileStatus.getLen(), splitOffset + splitLength);
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class ContinuousFileMonitoringFunction method monitorDirAndForwardSplits.
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException {
assert (Thread.holdsLock(checkpointLock));
Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);
for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits : splitsSortedByModTime.entrySet()) {
long modificationTime = splits.getKey();
for (TimestampedFileInputSplit split : splits.getValue()) {
LOG.info("Forwarding split: " + split);
context.collect(split);
}
// update the global modification time
globalModificationTime = Math.max(globalModificationTime, modificationTime);
}
}
Aggregations