use of org.apache.hadoop.fs.LocatedFileStatus in project hive by apache.
the class Utilities method getMmDirectoryCandidatesRecursive.
private static Path[] getMmDirectoryCandidatesRecursive(FileSystem fs, Path path, int skipLevels, PathFilter filter) throws IOException {
String lastRelDir = null;
HashSet<Path> results = new HashSet<Path>();
String relRoot = Path.getPathWithoutSchemeAndAuthority(path).toString();
if (!relRoot.endsWith(Path.SEPARATOR)) {
relRoot += Path.SEPARATOR;
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(path, true);
while (allFiles.hasNext()) {
LocatedFileStatus lfs =;
Path lfsPath = lfs.getPath();
Path dirPath = Path.getPathWithoutSchemeAndAuthority(lfsPath);
String dir = dirPath.toString();
if (!dir.startsWith(relRoot)) {
throw new IOException("Path " + lfsPath + " is not under " + relRoot + " (when shortened to " + dir + ")");
String subDir = dir.substring(relRoot.length());
Utilities.FILE_OP_LOGGER.trace("Looking at {} from {}", subDir, lfsPath);
// If sorted, we'll skip a bunch of files.
if (lastRelDir != null && subDir.startsWith(lastRelDir)) {
int startIx = skipLevels > 0 ? -1 : 0;
for (int i = 0; i < skipLevels; ++i) {
startIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
if (startIx == -1) {"Expected level of nesting ({}) is not " + " present in {} (from {})", skipLevels, subDir, lfsPath);
if (startIx == -1) {
int endIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
if (endIx == -1) {"Expected level of nesting ({}) is not present in" + " {} (from {})", (skipLevels + 1), subDir, lfsPath);
lastRelDir = subDir = subDir.substring(0, endIx);
Path candidate = new Path(relRoot, subDir);
if (!filter.accept(candidate)) {
return results.toArray(new Path[results.size()]);
use of org.apache.hadoop.fs.LocatedFileStatus in project gatk by broadinstitute.
the class DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTest method testDiscoverVariantsRunnableMiniCluster.
@Test(dataProvider = "discoverVariantsFromContigAlignmentsSGASparkIntegrationTest", groups = "sv")
public void testDiscoverVariantsRunnableMiniCluster(final DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTest.DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTestArgs params) throws Exception {
MiniClusterUtils.runOnIsolatedMiniCluster(cluster -> {
final List<String> argsToBeModified = Arrays.asList(new ArgumentsBuilder().add(params.getCommandLineNoApiKey()).getArgsArray());
final Path workingDirectory = MiniClusterUtils.getWorkingDir(cluster);
int idx = 0;
idx = argsToBeModified.indexOf("--inputAssemblies");
Path path = new Path(workingDirectory, "assemblies_0");
File file = new File(argsToBeModified.get(idx + 1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx + 1, path.toUri().toString());
idx = argsToBeModified.indexOf("--inputAlignments");
path = new Path(workingDirectory, "alignments");
file = new File(argsToBeModified.get(idx + 1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx + 1, path.toUri().toString());
idx = argsToBeModified.indexOf("-R");
path = new Path(workingDirectory, "reference.2bit");
file = new File(argsToBeModified.get(idx + 1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx + 1, path.toUri().toString());
idx = argsToBeModified.indexOf("--fastaReference");
path = new Path(workingDirectory, "reference.fasta");
file = new File(argsToBeModified.get(idx + 1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx + 1, path.toUri().toString());
path = new Path(workingDirectory, "reference.fasta.fai");
cluster.getFileSystem().copyFromLocalFile(new Path(SVIntegrationTestDataProvider.reference_fai.toURI()), path);
path = new Path(workingDirectory, "reference.dict");
cluster.getFileSystem().copyFromLocalFile(new Path(SVIntegrationTestDataProvider.reference_dict.toURI()), path);
idx = argsToBeModified.indexOf("-O");
path = new Path(workingDirectory, "variants.vcf");
final String vcfOnHDFS = path.toUri().toString();
argsToBeModified.set(idx + 1, vcfOnHDFS);
StructuralVariationDiscoveryPipelineSparkIntegrationTest.svDiscoveryVCFEquivalenceTest(vcfOnHDFS, SVIntegrationTestDataProvider.EXPECTED_SIMPLE_INV_VCF, Arrays.asList("ALIGN_LENGTHS", "CTG_NAMES"), true);
final RemoteIterator<LocatedFileStatus> it = FileSystem.get(workingDirectory.toUri(), new Configuration()).listFiles(workingDirectory, true);
while (it.hasNext()) System.err.println(;
final FileSystem fs = FileSystem.get(workingDirectory.toUri(), new Configuration());
Assert.assertTrue(fs.exists(new Path(argsToBeModified.get(argsToBeModified.indexOf("--inputAlignments") + 1) + "_withTwoAlignments")));
Assert.assertTrue(fs.exists(new Path(argsToBeModified.get(argsToBeModified.indexOf("--inputAlignments") + 1) + "_withMoreThanTwoAlignments")));
use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.
the class AbstractDFSCarbonFile method getLocations.
public String[] getLocations() throws IOException {
BlockLocation[] blkLocations;
if (fileStatus instanceof LocatedFileStatus) {
blkLocations = ((LocatedFileStatus) fileStatus).getBlockLocations();
} else {
FileSystem fs = fileStatus.getPath().getFileSystem(FileFactory.getConfiguration());
blkLocations = fs.getFileBlockLocations(fileStatus.getPath(), 0L, fileStatus.getLen());
return blkLocations[0].getHosts();
use of org.apache.hadoop.fs.LocatedFileStatus in project dr-elephant by linkedin.
the class MapReduceFSFetcherHadoop2 method getHistoryFiles.
private DataFiles getHistoryFiles(AnalyticJob job) throws IOException {
String jobId = Utils.getJobIdFromApplicationId(job.getAppId());
String jobConfPath = null;
String jobHistPath = null;
// Search files in done dir
String jobHistoryDirPath = getHistoryDir(job);
if (_fs.exists(new Path(jobHistoryDirPath))) {
RemoteIterator<LocatedFileStatus> it = _fs.listFiles(new Path(jobHistoryDirPath), false);
while (it.hasNext() && (jobConfPath == null || jobHistPath == null)) {
String name =;
if (name.contains(jobId)) {
if (name.endsWith("_conf.xml")) {
jobConfPath = jobHistoryDirPath + name;
} else if (name.endsWith(".jhist")) {
jobHistPath = jobHistoryDirPath + name;
// If some files are missing, search in the intermediate-done-dir in case the HistoryServer has
// not yet moved them into the done-dir.
String intermediateDirPath = _intermediateHistoryLocation + File.separator + job.getUser() + File.separator;
if (jobConfPath == null) {
jobConfPath = intermediateDirPath + jobId + "_conf.xml";
if (!_fs.exists(new Path(jobConfPath))) {
throw new FileNotFoundException("Can't find config of " + jobId + " in neither " + jobHistoryDirPath + " nor " + intermediateDirPath);
}"Found job config in intermediate dir: " + jobConfPath);
if (jobHistPath == null) {
try {
RemoteIterator<LocatedFileStatus> it = _fs.listFiles(new Path(intermediateDirPath), false);
while (it.hasNext()) {
String name =;
if (name.contains(jobId) && name.endsWith(".jhist")) {
jobHistPath = intermediateDirPath + name;"Found history file in intermediate dir: " + jobHistPath);
} catch (FileNotFoundException e) {
logger.error("Intermediate history directory " + intermediateDirPath + " not found");
if (jobHistPath == null) {
throw new FileNotFoundException("Can't find history file of " + jobId + " in neither " + jobHistoryDirPath + " nor " + intermediateDirPath);
return new DataFiles(jobConfPath, jobHistPath);
use of org.apache.hadoop.fs.LocatedFileStatus in project stocator by CODAIT.
the class ObjectStoreFileSystemTest method listLocatedStatusTestNotFound2.
@Test(expected = FileNotFoundException.class)
public void listLocatedStatusTestNotFound2() throws Exception {
int count = 0;
RemoteIterator<LocatedFileStatus> stats = getFs().listLocatedStatus(new Path(fileName + "0"));
while (stats.hasNext()) {
LocatedFileStatus stat =;
Assert.assertEquals(iterNum * 0, count);