Search in sources :

Example 36 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class IntervalArgumentCollection method parseIntervals.

private void parseIntervals(final GenomeLocParser genomeLocParser) {
    // return if no interval arguments at all
    if (!intervalsSpecified()) {
        throw new GATKException("Cannot call parseIntervals() without specifying either intervals to include or exclude.");
    }
    GenomeLocSortedSet includeSortedSet;
    if (getIntervalStrings().isEmpty()) {
        // the -L argument isn't specified, which means that -XL was, since we checked intervalsSpecified()
        // therefore we set the include set to be the entire reference territory
        includeSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(genomeLocParser.getSequenceDictionary());
    } else {
        try {
            includeSortedSet = IntervalUtils.loadIntervals(getIntervalStrings(), intervalSetRule, intervalMerging, intervalPadding, genomeLocParser);
        } catch (UserException.EmptyIntersection e) {
            throw new CommandLineException.BadArgumentValue("-L, --interval_set_rule", getIntervalStrings() + "," + intervalSetRule, "The specified intervals had an empty intersection");
        }
    }
    final GenomeLocSortedSet excludeSortedSet = IntervalUtils.loadIntervals(excludeIntervalStrings, IntervalSetRule.UNION, intervalMerging, intervalExclusionPadding, genomeLocParser);
    if (excludeSortedSet.contains(GenomeLoc.UNMAPPED)) {
        throw new UserException("-XL unmapped is not currently supported");
    }
    GenomeLocSortedSet intervals;
    // if no exclude arguments, can return the included set directly
    if (excludeSortedSet.isEmpty()) {
        intervals = includeSortedSet;
    } else // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
    {
        intervals = includeSortedSet.subtractRegions(excludeSortedSet);
        if (intervals.isEmpty()) {
            throw new CommandLineException.BadArgumentValue("-L,-XL", getIntervalStrings().toString() + ", " + excludeIntervalStrings.toString(), "The intervals specified for exclusion with -XL removed all territory specified by -L.");
        }
        // logging messages only printed when exclude (-XL) arguments are given
        final long toPruneSize = includeSortedSet.coveredSize();
        final long toExcludeSize = excludeSortedSet.coveredSize();
        final long intervalSize = intervals.coveredSize();
        logger.info(String.format("Initial include intervals span %d loci; exclude intervals span %d loci", toPruneSize, toExcludeSize));
        logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)", toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
    }
    logger.info(String.format("Processing %d bp from intervals", intervals.coveredSize()));
    // Separate out requests for unmapped records from the rest of the intervals.
    boolean traverseUnmapped = false;
    if (intervals.contains(GenomeLoc.UNMAPPED)) {
        traverseUnmapped = true;
        intervals.remove(GenomeLoc.UNMAPPED);
    }
    traversalParameters = new TraversalParameters(IntervalUtils.convertGenomeLocsToSimpleIntervals(intervals.toList()), traverseUnmapped);
}
Also used : TraversalParameters(org.broadinstitute.hellbender.engine.TraversalParameters) UserException(org.broadinstitute.hellbender.exceptions.UserException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException)

Example 37 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceBases.

/**
       * Query the Google Genomics API for reference bases spanning the specified interval from the specified
       * reference name.
       *
       * @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
       * @param interval - the range of bases to retrieve.
       * @return the reference bases specified by interval and apiData (using the Google Genomics API).
       */
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
    Utils.nonNull(interval);
    if (genomicsService == null) {
        if (pipelineOptions == null) {
            // Fall back on the saved apiKey for Spark.
            GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
            options.setApiKey(apiKey);
            genomicsService = createGenomicsService(options);
        } else {
            genomicsService = createGenomicsService(pipelineOptions);
        }
    }
    if (!referenceNameToIdTable.containsKey(interval.getContig())) {
        throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
    }
    try {
        final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
        listRequest.setStart(interval.getGA4GHStart());
        listRequest.setEnd(interval.getGA4GHEnd());
        ListBasesResponse result = listRequest.execute();
        if (result.getSequence() == null) {
            throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
        }
        byte[] received = result.getSequence().getBytes();
        byte[] bases = received;
        if (received.length < interval.size()) {
            final List<byte[]> blobs = new ArrayList<>();
            blobs.add(received);
            while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
                listRequest.setPageToken(result.getNextPageToken());
                result = listRequest.execute();
                blobs.add(result.getSequence().getBytes());
            }
            final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
            bases = Bytes.concat(resultsArray);
        }
        if (bases.length != interval.size()) {
            throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
        }
        return new ReferenceBases(bases, interval);
    } catch (IOException e) {
        throw new UserException("Query to genomics service failed for reference interval " + interval, e);
    }
}
Also used : Genomics(com.google.api.services.genomics.Genomics) IOException(java.io.IOException) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GATKGCSOptions(org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 38 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceNameToReferenceTable.

/**
     * getReferenceNameToReferenceTable produces a table from reference contig name
     * to the corresponding Reference object. The table is produced via a query to get the mapping. The query
     * currently takes ~10 seconds, so this table should be cached and no produced per query to get the reference bases.
     * This is clumsy and should be refactored with better Genomics API use (issue 643).
     * @param pipelineOptions - are used to get the credentials necessary to call the Genomics API
     * @param referenceSetID - the ID of the reference set to use
     * @return returns a mapping from reference name to String ID.
     */
public Map<String, Reference> getReferenceNameToReferenceTable(final PipelineOptions pipelineOptions, final String referenceSetID) {
    Utils.nonNull(pipelineOptions);
    Utils.nonNull(referenceSetID);
    fillGenomicsService(pipelineOptions);
    final Map<String, Reference> ret = new LinkedHashMap<>();
    try {
        final SearchReferencesRequest content = new SearchReferencesRequest();
        content.setReferenceSetId(referenceSetID);
        final SearchReferencesResponse found = genomicsService.references().search(content).execute();
        for (Reference r : found.getReferences()) {
            ret.put(r.getName(), r);
        }
    } catch (IOException e) {
        throw new UserException("Error while looking up reference set " + referenceSetID, e);
    }
    return ret;
}
Also used : IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 39 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReadsSparkSource method getHeader.

/**
     * Loads the header using Hadoop-BAM.
     * @param filePath path to the bam.
     * @param referencePath Reference path or null if not available. Reference is required for CRAM files.
     * @return the header for the bam.
     */
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
    // GCS case
    if (BucketUtils.isCloudStorageUrl(filePath)) {
        try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
            return readsDataSource.getHeader();
        }
    }
    // local file or HDFs case
    try {
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
        if (fs.isDirectory(path)) {
            FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {

                private static final long serialVersionUID = 1L;

                @Override
                public boolean accept(Path path) {
                    return path.getName().startsWith(HADOOP_PART_PREFIX);
                }
            });
            if (bamFiles.length == 0) {
                throw new UserException("No BAM files to load header from in: " + path);
            }
            // Hadoop-BAM writes the same header to each shard, so use the first one
            path = bamFiles[0].getPath();
        }
        setHadoopBAMConfigurationProperties(filePath, referencePath);
        return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
    } catch (IOException | IllegalArgumentException e) {
        throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) UserException(org.broadinstitute.hellbender.exceptions.UserException) IOException(java.io.IOException)

Example 40 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SparkContextFactory method determineDefaultSparkMaster.

/**
     * Create the default Spark master, determines the number of cores it should use. Applicable to Spark test only.
     *   Read the specification from the environmental variable GATK_TEST_SPARK_CORES
     *      If the enviromental variable is not set,  use all available cores as in "local[*]"
     *      If the value is a positive integer, use the value
     *      If the value is invalid (strings, empty, etc), throw an UserException
     *      If the value is a negative interger or zero, throw an UserException
     */
private static String determineDefaultSparkMaster() {
    final String defaultSparkMasterString = "local[*]";
    String sparkMasterString;
    String sparkSpecFromEnvironment = System.getenv(SPARK_CORES_ENV_VARIABLE);
    if (null == sparkSpecFromEnvironment) {
        sparkMasterString = defaultSparkMasterString;
    } else {
        int numSparkCoresFromEnv = 0;
        try {
            numSparkCoresFromEnv = Integer.parseInt(sparkSpecFromEnvironment);
        } catch (NumberFormatException e) {
            throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Positive integers only");
        }
        if (numSparkCoresFromEnv > 0) {
            sparkMasterString = String.format("local[%d]", numSparkCoresFromEnv);
        } else {
            throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Number of cores must be positive");
        }
    }
    return sparkMasterString;
}
Also used : UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)100 File (java.io.File)30 IOException (java.io.IOException)30 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)11 SamReader (htsjdk.samtools.SamReader)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)10 SAMRecord (htsjdk.samtools.SAMRecord)9 IntervalList (htsjdk.samtools.util.IntervalList)9 List (java.util.List)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)8 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)8 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 LogManager (org.apache.logging.log4j.LogManager)8 Logger (org.apache.logging.log4j.Logger)8 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 MetricsFile (htsjdk.samtools.metrics.MetricsFile)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)5 FileNotFoundException (java.io.FileNotFoundException)5