Search in sources :

Example 71 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.

the class AbstractDFSCarbonFile method listFiles.

/**
 * Method used to list files recursively and apply file filter on the result.
 */
@Override
public List<CarbonFile> listFiles(boolean recursive, CarbonFileFilter fileFilter) throws IOException {
    List<CarbonFile> carbonFiles = new ArrayList<>();
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (null != fileStatus && fileStatus.isDirectory()) {
        RemoteIterator<LocatedFileStatus> listStatus = fileSystem.listFiles(fileStatus.getPath(), recursive);
        while (listStatus.hasNext()) {
            LocatedFileStatus locatedFileStatus = listStatus.next();
            CarbonFile carbonFile = FileFactory.getCarbonFile(locatedFileStatus.getPath().toString());
            if (fileFilter.accept(carbonFile)) {
                carbonFiles.add(carbonFile);
            }
        }
    }
    return carbonFiles;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 72 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.

the class AbstractDFSCarbonFile method listFiles.

@Override
public CarbonFile[] listFiles(boolean recursive, int maxCount) throws IOException {
    List<CarbonFile> carbonFiles = new ArrayList<>();
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (null != fileStatus && fileStatus.isDirectory()) {
        RemoteIterator<LocatedFileStatus> listStatus = fileSystem.listFiles(path, recursive);
        int counter = 0;
        while (counter < maxCount && listStatus.hasNext()) {
            LocatedFileStatus locatedFileStatus = listStatus.next();
            CarbonFile carbonFile = FileFactory.getCarbonFile(locatedFileStatus.getPath().toString());
            carbonFiles.add(carbonFile);
            counter++;
        }
    }
    return carbonFiles.toArray(new CarbonFile[0]);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 73 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project hive by apache.

the class HdfsUtils method listLocatedStatus.

public static List<FileStatus> listLocatedStatus(final FileSystem fs, final Path path, final PathFilter filter) throws IOException {
    RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path);
    List<FileStatus> result = new ArrayList<FileStatus>();
    while (itr.hasNext()) {
        FileStatus stat = itr.next();
        if (filter == null || filter.accept(stat.getPath())) {
            result.add(stat);
        }
    }
    return result;
}
Also used : LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 74 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.

the class HdfsDataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {
            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {

                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }
                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);
                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
                        return result;
                    }
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {
            // --------    zip     ---------
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);
            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
            return result;
        } else if (CompressionUtils.isGz(path.getName())) {
            // --------    gzip     ---------
            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);
            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileUtils(io.druid.java.util.common.FileUtils) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) ByteSource(com.google.common.io.ByteSource) File(java.io.File)

Example 75 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.

the class HdfsTaskLogs method killOlderThan.

@Override
public void killOlderThan(long timestamp) throws IOException {
    Path taskLogDir = new Path(config.getDirectory());
    FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
    if (fs.exists(taskLogDir)) {
        if (!fs.isDirectory(taskLogDir)) {
            throw new IOException(String.format("taskLogDir [%s] must be a directory.", taskLogDir));
        }
        RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
        while (iter.hasNext()) {
            LocatedFileStatus file = iter.next();
            if (file.getModificationTime() < timestamp) {
                Path p = file.getPath();
                log.info("Deleting hdfs task log [%s].", p.toUri().toString());
                fs.delete(p, true);
            }
            if (Thread.currentThread().isInterrupted()) {
                throw new IOException(new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException)

Aggregations

LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)145 Path (org.apache.hadoop.fs.Path)105 FileSystem (org.apache.hadoop.fs.FileSystem)54 ArrayList (java.util.ArrayList)48 FileStatus (org.apache.hadoop.fs.FileStatus)34 Test (org.junit.Test)33 IOException (java.io.IOException)27 Configuration (org.apache.hadoop.conf.Configuration)20 File (java.io.File)13 HashSet (java.util.HashSet)12 FileNotFoundException (java.io.FileNotFoundException)11 BlockLocation (org.apache.hadoop.fs.BlockLocation)10 RemoteIterator (org.apache.hadoop.fs.RemoteIterator)8 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)6 HashMap (java.util.HashMap)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 Map (java.util.Map)5 Matcher (java.util.regex.Matcher)5 BufferedReader (java.io.BufferedReader)4