Search in sources :

Example 71 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hadoop by apache.

the class ErasureCodeBenchmarkThroughput method cleanUp.

private void cleanUp(int dataSizeMB, boolean isEc) throws IOException {
    final String fileName = getFilePath(dataSizeMB, isEc);
    Path path = isEc ? new Path(EC_DIR) : new Path(REP_DIR);
    FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {

        @Override
        public boolean accept(Path path) {
            return path.toString().contains(fileName);
        }
    });
    for (FileStatus fileStatus : fileStatuses) {
        fs.delete(fileStatus.getPath(), false);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus)

Example 72 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project incubator-systemml by apache.

the class ApplyTfCSVMR method deletePartFiles.

private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException {
    PathFilter filter = new PathFilter() {

        public boolean accept(Path file) {
            return file.getName().startsWith("part-");
        }
    };
    FileStatus[] list = fs.listStatus(path, filter);
    for (FileStatus stat : list) {
        fs.delete(stat.getPath(), false);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus)

Example 73 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project carbondata by apache.

the class CarbonInputFormat method getFileStatus.

private void getFileStatus(JobContext job, String[] segmentsToConsider, String[] filesToConsider, List<FileStatus> result) throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }
    PathFilter inputFilter = getDataFileFilter();
    AbsoluteTableIdentifier absIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CarbonTablePath tablePath = getTablePath(absIdentifier);
    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());
    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];
        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            String dataDirectoryPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            if (filesToConsider.length == 0) {
                Path segmentPath = new Path(dataDirectoryPath);
                FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());
                getFileStatusInternal(inputFilter, fs, segmentPath, result);
            } else {
                for (int k = 0; k < filesToConsider.length; ++k) {
                    String dataPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId) + File.separator + filesToConsider[k]);
                    Path filePath = new Path(dataPath);
                    FileSystem fs = filePath.getFileSystem(job.getConfiguration());
                    getFileStatusInternal(inputFilter, fs, filePath, result);
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) PathFilter(org.apache.hadoop.fs.PathFilter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException(java.io.IOException)

Example 74 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project lucene-solr by apache.

the class HdfsDirectoryFactory method cleanupOldIndexDirectories.

@Override
public void cleanupOldIndexDirectories(final String dataDir, final String currentIndexDir, boolean afterReload) {
    // Get the FileSystem object
    final Path dataDirPath = new Path(dataDir);
    final Configuration conf = getConf();
    FileSystem fileSystem = null;
    try {
        fileSystem = tmpFsCache.get(dataDir, () -> FileSystem.get(dataDirPath.toUri(), conf));
    } catch (ExecutionException e) {
        throw new RuntimeException(e);
    }
    boolean pathExists = false;
    try {
        pathExists = fileSystem.exists(dataDirPath);
    } catch (IOException e) {
        LOG.error("Error checking if hdfs path " + dataDir + " exists", e);
    }
    if (!pathExists) {
        LOG.warn("{} does not point to a valid data directory; skipping clean-up of old index directories.", dataDir);
        return;
    }
    // make sure we don't delete the current
    final Path currentIndexDirPath = new Path(currentIndexDir);
    final FileSystem fs = fileSystem;
    FileStatus[] oldIndexDirs = null;
    try {
        oldIndexDirs = fileSystem.listStatus(dataDirPath, new PathFilter() {

            @Override
            public boolean accept(Path path) {
                boolean accept = false;
                String pathName = path.getName();
                try {
                    accept = fs.isDirectory(path) && !path.equals(currentIndexDirPath) && (pathName.equals("index") || pathName.matches(INDEX_W_TIMESTAMP_REGEX));
                } catch (IOException e) {
                    LOG.error("Error checking if path {} is an old index directory, caused by: {}", path, e);
                }
                return accept;
            }
        });
    } catch (IOException ioExc) {
        LOG.error("Error checking for old index directories to clean-up.", ioExc);
    }
    List<Path> oldIndexPaths = new ArrayList<>(oldIndexDirs.length);
    for (FileStatus ofs : oldIndexDirs) {
        oldIndexPaths.add(ofs.getPath());
    }
    if (oldIndexDirs == null || oldIndexDirs.length == 0)
        // nothing to clean-up
        return;
    Collections.sort(oldIndexPaths, Collections.reverseOrder());
    Set<String> livePaths = getLivePaths();
    int i = 0;
    if (afterReload) {
        LOG.info("Will not remove most recent old directory on reload {}", oldIndexDirs[0]);
        i = 1;
    }
    LOG.info("Found {} old index directories to clean-up under {} afterReload={}", oldIndexDirs.length - i, dataDirPath, afterReload);
    for (; i < oldIndexPaths.size(); i++) {
        Path oldDirPath = oldIndexPaths.get(i);
        if (livePaths.contains(oldDirPath.toString())) {
            LOG.warn("Cannot delete directory {} because it is still being referenced in the cache.", oldDirPath);
        } else {
            try {
                if (fileSystem.delete(oldDirPath, true)) {
                    LOG.info("Deleted old index directory {}", oldDirPath);
                } else {
                    LOG.warn("Failed to delete old index directory {}", oldDirPath);
                }
            } catch (IOException e) {
                LOG.error("Failed to delete old index directory {} due to: {}", oldDirPath, e);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutionException(java.util.concurrent.ExecutionException)

Example 75 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hadoop by apache.

the class HadoopArchiveLogsRunner method runInternal.

private int runInternal() throws Exception {
    String remoteAppLogDir = remoteLogDir + File.separator + user + File.separator + suffix + File.separator + appId;
    // Run 'hadoop archives' command in local mode
    conf.set("mapreduce.framework.name", "local");
    // Set the umask so we get 640 files and 750 dirs
    conf.set("fs.permissions.umask-mode", "027");
    HadoopArchives ha = new HadoopArchives(conf);
    String[] haArgs = { "-archiveName", appId + ".har", "-p", remoteAppLogDir, "*", workingDir };
    StringBuilder sb = new StringBuilder("Executing 'hadoop archives'");
    for (String haArg : haArgs) {
        sb.append("\n\t").append(haArg);
    }
    LOG.info(sb.toString());
    ha.run(haArgs);
    FileSystem fs = null;
    // Move har file to correct location and delete original logs
    try {
        fs = FileSystem.get(conf);
        Path harDest = new Path(remoteAppLogDir, appId + ".har");
        LOG.info("Moving har to original location");
        fs.rename(new Path(workingDir, appId + ".har"), harDest);
        LOG.info("Deleting original logs");
        for (FileStatus original : fs.listStatus(new Path(remoteAppLogDir), new PathFilter() {

            @Override
            public boolean accept(Path path) {
                return !path.getName().endsWith(".har");
            }
        })) {
            fs.delete(original.getPath(), false);
        }
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)123 Path (org.apache.hadoop.fs.Path)114 FileStatus (org.apache.hadoop.fs.FileStatus)96 Test (org.junit.Test)47 IOException (java.io.IOException)42 FileSystem (org.apache.hadoop.fs.FileSystem)39 ArrayList (java.util.ArrayList)22 List (java.util.List)19 Configuration (org.apache.hadoop.conf.Configuration)18 Collections (java.util.Collections)11 BufferedReader (java.io.BufferedReader)9 InputStreamReader (java.io.InputStreamReader)9 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 Assert.assertEquals (org.junit.Assert.assertEquals)9 Assert.assertTrue (org.junit.Assert.assertTrue)9 URI (java.net.URI)8 Test (org.testng.annotations.Test)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 IGNORED (com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED)6 RECURSE (com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE)6