Search in sources :

Example 11 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class FileInputFormat method getStatistics.

/**
	 * Obtains basic file statistics containing only file size. If the input is a directory, then the size is the sum of all contained files.
	 * 
	 * @see org.apache.flink.api.common.io.InputFormat#getStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics)
	 */
@Override
public FileBaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
    final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
    try {
        final Path path = this.filePath;
        final FileSystem fs = FileSystem.get(path.toUri());
        return getFileStats(cachedFileStats, path, fs, new ArrayList<FileStatus>(1));
    } catch (IOException ioex) {
        if (LOG.isWarnEnabled()) {
            LOG.warn("Could not determine statistics for file '" + this.filePath + "' due to an io error: " + ioex.getMessage());
        }
    } catch (Throwable t) {
        if (LOG.isErrorEnabled()) {
            LOG.error("Unexpected problem while getting the file statistics for file '" + this.filePath + "': " + t.getMessage(), t);
        }
    }
    // no statistics available
    return null;
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) IOException(java.io.IOException)

Example 12 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class LocalFileSystem method listStatus.

@Override
public FileStatus[] listStatus(final Path f) throws IOException {
    final File localf = pathToFile(f);
    FileStatus[] results;
    if (!localf.exists()) {
        return null;
    }
    if (localf.isFile()) {
        return new FileStatus[] { new LocalFileStatus(localf, this) };
    }
    final String[] names = localf.list();
    if (names == null) {
        return null;
    }
    results = new FileStatus[names.length];
    for (int i = 0; i < names.length; i++) {
        results[i] = getFileStatus(new Path(f, names[i]));
    }
    return results;
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) File(java.io.File)

Example 13 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class DistCp method getCopyTasks.

private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException {
    FileStatus[] res = p.getFileSystem().listStatus(p);
    if (res == null) {
        return;
    }
    for (FileStatus fs : res) {
        if (fs.isDir()) {
            getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks);
        } else {
            Path cp = fs.getPath();
            tasks.add(new FileCopyTask(cp, rel + cp.getName()));
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus)

Example 14 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class TestFileSystem method listStatus.

@Override
public FileStatus[] listStatus(Path f) throws IOException {
    FileStatus[] stati = super.listStatus(f);
    LocalFileStatus[] newStati = new LocalFileStatus[stati.length];
    for (int i = 0; i < stati.length; i++) {
        newStati[i] = new LocalFileStatus(((LocalFileStatus) stati[i]).getFile(), this);
    }
    return newStati;
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) LocalFileStatus(org.apache.flink.core.fs.local.LocalFileStatus) LocalFileStatus(org.apache.flink.core.fs.local.LocalFileStatus)

Example 15 with FileStatus

use of org.apache.flink.core.fs.FileStatus in project flink by apache.

the class HadoopInputFormatBase method getFileStats.

// --------------------------------------------------------------------------------------------
//  Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
    long latestModTime = 0L;
    // get the file info and check whether the cached statistics are still valid.
    for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
        final Path filePath = new Path(hadoopPath.toUri());
        final FileSystem fs = FileSystem.get(filePath.toUri());
        final FileStatus file = fs.getFileStatus(filePath);
        latestModTime = Math.max(latestModTime, file.getModificationTime());
        // enumerate all files and check their modification time stamp.
        if (file.isDir()) {
            FileStatus[] fss = fs.listStatus(filePath);
            files.ensureCapacity(files.size() + fss.length);
            for (FileStatus s : fss) {
                if (!s.isDir()) {
                    files.add(s);
                    latestModTime = Math.max(s.getModificationTime(), latestModTime);
                }
            }
        } else {
            files.add(file);
        }
    }
    // check whether the cached statistics are still valid, if we have any
    if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
        return cachedStats;
    }
    // calculate the whole length
    long len = 0;
    for (FileStatus s : files) {
        len += s.getLen();
    }
    // sanity check
    if (len <= 0) {
        len = BaseStatistics.SIZE_UNKNOWN;
    }
    return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) FileSystem(org.apache.flink.core.fs.FileSystem)

Aggregations

FileStatus (org.apache.flink.core.fs.FileStatus)24 Path (org.apache.flink.core.fs.Path)16 FileSystem (org.apache.flink.core.fs.FileSystem)13 ArrayList (java.util.ArrayList)9 IOException (java.io.IOException)8 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)4 File (java.io.File)3 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)3 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)3 HashMap (java.util.HashMap)2 List (java.util.List)2 TreeMap (java.util.TreeMap)2 BlockLocation (org.apache.flink.core.fs.BlockLocation)2 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)2 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)2 DataInputStream (java.io.DataInputStream)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1