Search in sources :

Example 26 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project hive by apache.

the class Utilities method getMmDirectoryCandidatesRecursive.

private static Path[] getMmDirectoryCandidatesRecursive(FileSystem fs, Path path, int skipLevels, PathFilter filter) throws IOException {
    String lastRelDir = null;
    HashSet<Path> results = new HashSet<Path>();
    String relRoot = Path.getPathWithoutSchemeAndAuthority(path).toString();
    if (!relRoot.endsWith(Path.SEPARATOR)) {
        relRoot += Path.SEPARATOR;
    }
    RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(path, true);
    while (allFiles.hasNext()) {
        LocatedFileStatus lfs = allFiles.next();
        Path lfsPath = lfs.getPath();
        Path dirPath = Path.getPathWithoutSchemeAndAuthority(lfsPath);
        String dir = dirPath.toString();
        if (!dir.startsWith(relRoot)) {
            throw new IOException("Path " + lfsPath + " is not under " + relRoot + " (when shortened to " + dir + ")");
        }
        String subDir = dir.substring(relRoot.length());
        Utilities.FILE_OP_LOGGER.trace("Looking at {} from {}", subDir, lfsPath);
        // If sorted, we'll skip a bunch of files.
        if (lastRelDir != null && subDir.startsWith(lastRelDir)) {
            continue;
        }
        int startIx = skipLevels > 0 ? -1 : 0;
        for (int i = 0; i < skipLevels; ++i) {
            startIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
            if (startIx == -1) {
                Utilities.FILE_OP_LOGGER.info("Expected level of nesting ({}) is not " + " present in {} (from {})", skipLevels, subDir, lfsPath);
                break;
            }
        }
        if (startIx == -1) {
            continue;
        }
        int endIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
        if (endIx == -1) {
            Utilities.FILE_OP_LOGGER.info("Expected level of nesting ({}) is not present in" + " {} (from {})", (skipLevels + 1), subDir, lfsPath);
            continue;
        }
        lastRelDir = subDir = subDir.substring(0, endIx);
        Path candidate = new Path(relRoot, subDir);
        if (!filter.accept(candidate)) {
            continue;
        }
        results.add(fs.makeQualified(candidate));
    }
    return results.toArray(new Path[results.size()]);
}
Also used : Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 27 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project gatk by broadinstitute.

the class DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTest method testDiscoverVariantsRunnableMiniCluster.

@Test(dataProvider = "discoverVariantsFromContigAlignmentsSGASparkIntegrationTest", groups = "sv")
public void testDiscoverVariantsRunnableMiniCluster(final DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTest.DiscoverVariantsFromContigAlignmentsSGASparkIntegrationTestArgs params) throws Exception {
    MiniClusterUtils.runOnIsolatedMiniCluster(cluster -> {
        final List<String> argsToBeModified = Arrays.asList(new ArgumentsBuilder().add(params.getCommandLineNoApiKey()).getArgsArray());
        final Path workingDirectory = MiniClusterUtils.getWorkingDir(cluster);
        int idx = 0;
        idx = argsToBeModified.indexOf("--inputAssemblies");
        Path path = new Path(workingDirectory, "assemblies_0");
        File file = new File(argsToBeModified.get(idx + 1));
        cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
        argsToBeModified.set(idx + 1, path.toUri().toString());
        idx = argsToBeModified.indexOf("--inputAlignments");
        path = new Path(workingDirectory, "alignments");
        file = new File(argsToBeModified.get(idx + 1));
        cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
        argsToBeModified.set(idx + 1, path.toUri().toString());
        idx = argsToBeModified.indexOf("-R");
        path = new Path(workingDirectory, "reference.2bit");
        file = new File(argsToBeModified.get(idx + 1));
        cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
        argsToBeModified.set(idx + 1, path.toUri().toString());
        idx = argsToBeModified.indexOf("--fastaReference");
        path = new Path(workingDirectory, "reference.fasta");
        file = new File(argsToBeModified.get(idx + 1));
        cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
        argsToBeModified.set(idx + 1, path.toUri().toString());
        path = new Path(workingDirectory, "reference.fasta.fai");
        cluster.getFileSystem().copyFromLocalFile(new Path(SVIntegrationTestDataProvider.reference_fai.toURI()), path);
        path = new Path(workingDirectory, "reference.dict");
        cluster.getFileSystem().copyFromLocalFile(new Path(SVIntegrationTestDataProvider.reference_dict.toURI()), path);
        idx = argsToBeModified.indexOf("-O");
        path = new Path(workingDirectory, "variants.vcf");
        final String vcfOnHDFS = path.toUri().toString();
        argsToBeModified.set(idx + 1, vcfOnHDFS);
        runCommandLine(argsToBeModified);
        StructuralVariationDiscoveryPipelineSparkIntegrationTest.svDiscoveryVCFEquivalenceTest(vcfOnHDFS, SVIntegrationTestDataProvider.EXPECTED_SIMPLE_INV_VCF, Arrays.asList("ALIGN_LENGTHS", "CTG_NAMES"), true);
        final RemoteIterator<LocatedFileStatus> it = FileSystem.get(workingDirectory.toUri(), new Configuration()).listFiles(workingDirectory, true);
        while (it.hasNext()) System.err.println(it.next());
        final FileSystem fs = FileSystem.get(workingDirectory.toUri(), new Configuration());
        Assert.assertTrue(fs.exists(new Path(argsToBeModified.get(argsToBeModified.indexOf("--inputAlignments") + 1) + "_withTwoAlignments")));
        Assert.assertTrue(fs.exists(new Path(argsToBeModified.get(argsToBeModified.indexOf("--inputAlignments") + 1) + "_withMoreThanTwoAlignments")));
    });
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArgumentsBuilder(org.broadinstitute.hellbender.utils.test.ArgumentsBuilder) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 28 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.

the class AbstractDFSCarbonFile method getLocations.

@Override
public String[] getLocations() throws IOException {
    BlockLocation[] blkLocations;
    if (fileStatus instanceof LocatedFileStatus) {
        blkLocations = ((LocatedFileStatus) fileStatus).getBlockLocations();
    } else {
        FileSystem fs = fileStatus.getPath().getFileSystem(FileFactory.getConfiguration());
        blkLocations = fs.getFileBlockLocations(fileStatus.getPath(), 0L, fileStatus.getLen());
    }
    return blkLocations[0].getHosts();
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 29 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project dr-elephant by linkedin.

the class MapReduceFSFetcherHadoop2 method getHistoryFiles.

private DataFiles getHistoryFiles(AnalyticJob job) throws IOException {
    String jobId = Utils.getJobIdFromApplicationId(job.getAppId());
    String jobConfPath = null;
    String jobHistPath = null;
    // Search files in done dir
    String jobHistoryDirPath = getHistoryDir(job);
    if (_fs.exists(new Path(jobHistoryDirPath))) {
        RemoteIterator<LocatedFileStatus> it = _fs.listFiles(new Path(jobHistoryDirPath), false);
        while (it.hasNext() && (jobConfPath == null || jobHistPath == null)) {
            String name = it.next().getPath().getName();
            if (name.contains(jobId)) {
                if (name.endsWith("_conf.xml")) {
                    jobConfPath = jobHistoryDirPath + name;
                } else if (name.endsWith(".jhist")) {
                    jobHistPath = jobHistoryDirPath + name;
                }
            }
        }
    }
    // If some files are missing, search in the intermediate-done-dir in case the HistoryServer has
    // not yet moved them into the done-dir.
    String intermediateDirPath = _intermediateHistoryLocation + File.separator + job.getUser() + File.separator;
    if (jobConfPath == null) {
        jobConfPath = intermediateDirPath + jobId + "_conf.xml";
        if (!_fs.exists(new Path(jobConfPath))) {
            throw new FileNotFoundException("Can't find config of " + jobId + " in neither " + jobHistoryDirPath + " nor " + intermediateDirPath);
        }
        logger.info("Found job config in intermediate dir: " + jobConfPath);
    }
    if (jobHistPath == null) {
        try {
            RemoteIterator<LocatedFileStatus> it = _fs.listFiles(new Path(intermediateDirPath), false);
            while (it.hasNext()) {
                String name = it.next().getPath().getName();
                if (name.contains(jobId) && name.endsWith(".jhist")) {
                    jobHistPath = intermediateDirPath + name;
                    logger.info("Found history file in intermediate dir: " + jobHistPath);
                    break;
                }
            }
        } catch (FileNotFoundException e) {
            logger.error("Intermediate history directory " + intermediateDirPath + " not found");
        }
        if (jobHistPath == null) {
            throw new FileNotFoundException("Can't find history file of " + jobId + " in neither " + jobHistoryDirPath + " nor " + intermediateDirPath);
        }
    }
    return new DataFiles(jobConfPath, jobHistPath);
}
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException(java.io.FileNotFoundException) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 30 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project stocator by CODAIT.

the class ObjectStoreFileSystemTest method listLocatedStatusTestNotFound2.

@Test(expected = FileNotFoundException.class)
public void listLocatedStatusTestNotFound2() throws Exception {
    Assume.assumeNotNull(getFs());
    int count = 0;
    RemoteIterator<LocatedFileStatus> stats = getFs().listLocatedStatus(new Path(fileName + "0"));
    while (stats.hasNext()) {
        LocatedFileStatus stat = stats.next();
        Assert.assertTrue(stat.getPath().getName().startsWith("testFile0"));
        count++;
    }
    Assert.assertEquals(iterNum * 0, count);
}
Also used : StocatorPath(com.ibm.stocator.fs.common.StocatorPath) Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Test(org.junit.Test)

Aggregations

LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)139 Path (org.apache.hadoop.fs.Path)104 FileSystem (org.apache.hadoop.fs.FileSystem)55 ArrayList (java.util.ArrayList)43 Test (org.junit.Test)33 FileStatus (org.apache.hadoop.fs.FileStatus)29 IOException (java.io.IOException)27 Configuration (org.apache.hadoop.conf.Configuration)20 File (java.io.File)13 FileNotFoundException (java.io.FileNotFoundException)11 HashSet (java.util.HashSet)11 BlockLocation (org.apache.hadoop.fs.BlockLocation)9 RemoteIterator (org.apache.hadoop.fs.RemoteIterator)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)6 HashMap (java.util.HashMap)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 Map (java.util.Map)5 Matcher (java.util.regex.Matcher)5 BufferedReader (java.io.BufferedReader)4