use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class HadoopInputFormatBase method getFileStats.
// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
long latestModTime = 0L;
// get the file info and check whether the cached statistics are still valid.
for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
final Path filePath = new Path(hadoopPath.toUri());
final FileSystem fs = FileSystem.get(filePath.toUri());
final FileStatus file = fs.getFileStatus(filePath);
latestModTime = Math.max(latestModTime, file.getModificationTime());
// enumerate all files and check their modification time stamp.
if (file.isDir()) {
FileStatus[] fss = fs.listStatus(filePath);
files.ensureCapacity(files.size() + fss.length);
for (FileStatus s : fss) {
if (!s.isDir()) {
files.add(s);
latestModTime = Math.max(s.getModificationTime(), latestModTime);
}
}
} else {
files.add(file);
}
}
// check whether the cached statistics are still valid, if we have any
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
return cachedStats;
}
// calculate the whole length
long len = 0;
for (FileStatus s : files) {
len += s.getLen();
}
// sanity check
if (len <= 0) {
len = BaseStatistics.SIZE_UNKNOWN;
}
return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class MapRFileSystem method listStatus.
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
final org.apache.hadoop.fs.FileStatus[] hadoopFiles = this.fs.listStatus(new org.apache.hadoop.fs.Path(f.toString()));
final FileStatus[] files = new FileStatus[hadoopFiles.length];
// Convert types
for (int i = 0; i < files.length; i++) {
files[i] = new HadoopFileStatus(hadoopFiles[i]);
}
return files;
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class ContinuousFileMonitoringFunction method monitorDirAndForwardSplits.
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException {
assert (Thread.holdsLock(checkpointLock));
Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);
for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits : splitsSortedByModTime.entrySet()) {
long modificationTime = splits.getKey();
for (TimestampedFileInputSplit split : splits.getValue()) {
LOG.info("Forwarding split: " + split);
context.collect(split);
}
// update the global modification time
globalModificationTime = Math.max(globalModificationTime, modificationTime);
}
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class ContinuousFileMonitoringFunction method listEligibleFiles.
/**
* Returns the paths of the files not yet processed.
* @param fileSystem The filesystem where the monitored directory resides.
*/
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {
final FileStatus[] statuses;
try {
statuses = fileSystem.listStatus(path);
} catch (IOException e) {
// delay the check for eligible files in this case
return Collections.emptyMap();
}
if (statuses == null) {
LOG.warn("Path does not exist: {}", path);
return Collections.emptyMap();
} else {
Map<Path, FileStatus> files = new HashMap<>();
// handle the new files
for (FileStatus status : statuses) {
if (!status.isDir()) {
Path filePath = status.getPath();
long modificationTime = status.getModificationTime();
if (!shouldIgnore(filePath, modificationTime)) {
files.put(filePath, status);
}
} else if (format.getNestedFileEnumeration() && format.acceptFile(status)) {
files.putAll(listEligibleFiles(fileSystem, status.getPath()));
}
}
return files;
}
}
Aggregations