Search in sources :

Example 41 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project carbondata by apache.

the class CarbonInputFormat method getFileStatus.

private void getFileStatus(JobContext job, String[] segmentsToConsider, String[] filesToConsider, List<FileStatus> result) throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }
    PathFilter inputFilter = getDataFileFilter();
    AbsoluteTableIdentifier absIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CarbonTablePath tablePath = getTablePath(absIdentifier);
    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());
    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];
        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            String dataDirectoryPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            if (filesToConsider.length == 0) {
                Path segmentPath = new Path(dataDirectoryPath);
                FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());
                getFileStatusInternal(inputFilter, fs, segmentPath, result);
            } else {
                for (int k = 0; k < filesToConsider.length; ++k) {
                    String dataPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId) + File.separator + filesToConsider[k]);
                    Path filePath = new Path(dataPath);
                    FileSystem fs = filePath.getFileSystem(job.getConfiguration());
                    getFileStatusInternal(inputFilter, fs, filePath, result);
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) PathFilter(org.apache.hadoop.fs.PathFilter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException(java.io.IOException)

Example 42 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project incubator-systemml by apache.

the class ApplyTfCSVMR method deletePartFiles.

private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException {
    PathFilter filter = new PathFilter() {

        public boolean accept(Path file) {
            return file.getName().startsWith("part-");
        }
    };
    FileStatus[] list = fs.listStatus(path, filter);
    for (FileStatus stat : list) {
        fs.delete(stat.getPath(), false);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus)

Example 43 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project lucene-solr by apache.

the class HdfsDirectoryFactory method cleanupOldIndexDirectories.

@Override
public void cleanupOldIndexDirectories(final String dataDir, final String currentIndexDir, boolean afterReload) {
    // Get the FileSystem object
    final Path dataDirPath = new Path(dataDir);
    final Configuration conf = getConf();
    FileSystem fileSystem = null;
    try {
        fileSystem = tmpFsCache.get(dataDir, () -> FileSystem.get(dataDirPath.toUri(), conf));
    } catch (ExecutionException e) {
        throw new RuntimeException(e);
    }
    boolean pathExists = false;
    try {
        pathExists = fileSystem.exists(dataDirPath);
    } catch (IOException e) {
        LOG.error("Error checking if hdfs path " + dataDir + " exists", e);
    }
    if (!pathExists) {
        LOG.warn("{} does not point to a valid data directory; skipping clean-up of old index directories.", dataDir);
        return;
    }
    // make sure we don't delete the current
    final Path currentIndexDirPath = new Path(currentIndexDir);
    final FileSystem fs = fileSystem;
    FileStatus[] oldIndexDirs = null;
    try {
        oldIndexDirs = fileSystem.listStatus(dataDirPath, new PathFilter() {

            @Override
            public boolean accept(Path path) {
                boolean accept = false;
                String pathName = path.getName();
                try {
                    accept = fs.isDirectory(path) && !path.equals(currentIndexDirPath) && (pathName.equals("index") || pathName.matches(INDEX_W_TIMESTAMP_REGEX));
                } catch (IOException e) {
                    LOG.error("Error checking if path {} is an old index directory, caused by: {}", path, e);
                }
                return accept;
            }
        });
    } catch (IOException ioExc) {
        LOG.error("Error checking for old index directories to clean-up.", ioExc);
    }
    List<Path> oldIndexPaths = new ArrayList<>(oldIndexDirs.length);
    for (FileStatus ofs : oldIndexDirs) {
        oldIndexPaths.add(ofs.getPath());
    }
    if (oldIndexDirs == null || oldIndexDirs.length == 0)
        // nothing to clean-up
        return;
    Collections.sort(oldIndexPaths, Collections.reverseOrder());
    Set<String> livePaths = getLivePaths();
    int i = 0;
    if (afterReload) {
        LOG.info("Will not remove most recent old directory on reload {}", oldIndexDirs[0]);
        i = 1;
    }
    LOG.info("Found {} old index directories to clean-up under {} afterReload={}", oldIndexDirs.length - i, dataDirPath, afterReload);
    for (; i < oldIndexPaths.size(); i++) {
        Path oldDirPath = oldIndexPaths.get(i);
        if (livePaths.contains(oldDirPath.toString())) {
            LOG.warn("Cannot delete directory {} because it is still being referenced in the cache.", oldDirPath);
        } else {
            try {
                if (fileSystem.delete(oldDirPath, true)) {
                    LOG.info("Deleted old index directory {}", oldDirPath);
                } else {
                    LOG.warn("Failed to delete old index directory {}", oldDirPath);
                }
            } catch (IOException e) {
                LOG.error("Failed to delete old index directory {} due to: {}", oldDirPath, e);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)43 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)37 FileSystem (org.apache.hadoop.fs.FileSystem)18 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 InterruptedIOException (java.io.InterruptedIOException)4 Configuration (org.apache.hadoop.conf.Configuration)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Table (org.apache.hadoop.hbase.client.Table)3 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)3 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)3 URI (java.net.URI)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 Exchange (org.apache.camel.Exchange)2 Message (org.apache.camel.Message)2 DefaultMessage (org.apache.camel.impl.DefaultMessage)2