use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class FileInputFormat method getStatistics.
/**
* Obtains basic file statistics containing only file size. If the input is a directory, then the size is the sum of all contained files.
*
* @see org.apache.flink.api.common.io.InputFormat#getStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics)
*/
@Override
public FileBaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
try {
final Path path = this.filePath;
final FileSystem fs = FileSystem.get(path.toUri());
return getFileStats(cachedFileStats, path, fs, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics for file '" + this.filePath + "' due to an io error: " + ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics for file '" + this.filePath + "': " + t.getMessage(), t);
}
}
// no statistics available
return null;
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class LocalFileSystem method listStatus.
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
final File localf = pathToFile(f);
FileStatus[] results;
if (!localf.exists()) {
return null;
}
if (localf.isFile()) {
return new FileStatus[] { new LocalFileStatus(localf, this) };
}
final String[] names = localf.list();
if (names == null) {
return null;
}
results = new FileStatus[names.length];
for (int i = 0; i < names.length; i++) {
results[i] = getFileStatus(new Path(f, names[i]));
}
return results;
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class DistCp method getCopyTasks.
private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException {
FileStatus[] res = p.getFileSystem().listStatus(p);
if (res == null) {
return;
}
for (FileStatus fs : res) {
if (fs.isDir()) {
getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks);
} else {
Path cp = fs.getPath();
tasks.add(new FileCopyTask(cp, rel + cp.getName()));
}
}
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class TestFileSystem method listStatus.
@Override
public FileStatus[] listStatus(Path f) throws IOException {
FileStatus[] stati = super.listStatus(f);
LocalFileStatus[] newStati = new LocalFileStatus[stati.length];
for (int i = 0; i < stati.length; i++) {
newStati[i] = new LocalFileStatus(((LocalFileStatus) stati[i]).getFile(), this);
}
return newStati;
}
use of org.apache.flink.core.fs.FileStatus in project flink by apache.
the class HadoopInputFormatBase method getFileStats.
// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
long latestModTime = 0L;
// get the file info and check whether the cached statistics are still valid.
for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
final Path filePath = new Path(hadoopPath.toUri());
final FileSystem fs = FileSystem.get(filePath.toUri());
final FileStatus file = fs.getFileStatus(filePath);
latestModTime = Math.max(latestModTime, file.getModificationTime());
// enumerate all files and check their modification time stamp.
if (file.isDir()) {
FileStatus[] fss = fs.listStatus(filePath);
files.ensureCapacity(files.size() + fss.length);
for (FileStatus s : fss) {
if (!s.isDir()) {
files.add(s);
latestModTime = Math.max(s.getModificationTime(), latestModTime);
}
}
} else {
files.add(file);
}
}
// check whether the cached statistics are still valid, if we have any
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
return cachedStats;
}
// calculate the whole length
long len = 0;
for (FileStatus s : files) {
len += s.getLen();
}
// sanity check
if (len <= 0) {
len = BaseStatistics.SIZE_UNKNOWN;
}
return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
Aggregations