Search in sources :

Example 11 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceBases.

/**
       * Query the Google Genomics API for reference bases spanning the specified interval from the specified
       * reference name.
       *
       * @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
       * @param interval - the range of bases to retrieve.
       * @return the reference bases specified by interval and apiData (using the Google Genomics API).
       */
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
    Utils.nonNull(interval);
    if (genomicsService == null) {
        if (pipelineOptions == null) {
            // Fall back on the saved apiKey for Spark.
            GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
            options.setApiKey(apiKey);
            genomicsService = createGenomicsService(options);
        } else {
            genomicsService = createGenomicsService(pipelineOptions);
        }
    }
    if (!referenceNameToIdTable.containsKey(interval.getContig())) {
        throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
    }
    try {
        final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
        listRequest.setStart(interval.getGA4GHStart());
        listRequest.setEnd(interval.getGA4GHEnd());
        ListBasesResponse result = listRequest.execute();
        if (result.getSequence() == null) {
            throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
        }
        byte[] received = result.getSequence().getBytes();
        byte[] bases = received;
        if (received.length < interval.size()) {
            final List<byte[]> blobs = new ArrayList<>();
            blobs.add(received);
            while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
                listRequest.setPageToken(result.getNextPageToken());
                result = listRequest.execute();
                blobs.add(result.getSequence().getBytes());
            }
            final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
            bases = Bytes.concat(resultsArray);
        }
        if (bases.length != interval.size()) {
            throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
        }
        return new ReferenceBases(bases, interval);
    } catch (IOException e) {
        throw new UserException("Query to genomics service failed for reference interval " + interval, e);
    }
}
Also used : Genomics(com.google.api.services.genomics.Genomics) IOException(java.io.IOException) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GATKGCSOptions(org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 12 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceNameToReferenceTable.

/**
     * getReferenceNameToReferenceTable produces a table from reference contig name
     * to the corresponding Reference object. The table is produced via a query to get the mapping. The query
     * currently takes ~10 seconds, so this table should be cached and no produced per query to get the reference bases.
     * This is clumsy and should be refactored with better Genomics API use (issue 643).
     * @param pipelineOptions - are used to get the credentials necessary to call the Genomics API
     * @param referenceSetID - the ID of the reference set to use
     * @return returns a mapping from reference name to String ID.
     */
public Map<String, Reference> getReferenceNameToReferenceTable(final PipelineOptions pipelineOptions, final String referenceSetID) {
    Utils.nonNull(pipelineOptions);
    Utils.nonNull(referenceSetID);
    fillGenomicsService(pipelineOptions);
    final Map<String, Reference> ret = new LinkedHashMap<>();
    try {
        final SearchReferencesRequest content = new SearchReferencesRequest();
        content.setReferenceSetId(referenceSetID);
        final SearchReferencesResponse found = genomicsService.references().search(content).execute();
        for (Reference r : found.getReferences()) {
            ret.put(r.getName(), r);
        }
    } catch (IOException e) {
        throw new UserException("Error while looking up reference set " + referenceSetID, e);
    }
    return ret;
}
Also used : IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 13 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReadsSparkSource method getHeader.

/**
     * Loads the header using Hadoop-BAM.
     * @param filePath path to the bam.
     * @param referencePath Reference path or null if not available. Reference is required for CRAM files.
     * @return the header for the bam.
     */
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
    // GCS case
    if (BucketUtils.isCloudStorageUrl(filePath)) {
        try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
            return readsDataSource.getHeader();
        }
    }
    // local file or HDFs case
    try {
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
        if (fs.isDirectory(path)) {
            FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {

                private static final long serialVersionUID = 1L;

                @Override
                public boolean accept(Path path) {
                    return path.getName().startsWith(HADOOP_PART_PREFIX);
                }
            });
            if (bamFiles.length == 0) {
                throw new UserException("No BAM files to load header from in: " + path);
            }
            // Hadoop-BAM writes the same header to each shard, so use the first one
            path = bamFiles[0].getPath();
        }
        setHadoopBAMConfigurationProperties(filePath, referencePath);
        return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
    } catch (IOException | IllegalArgumentException e) {
        throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) UserException(org.broadinstitute.hellbender.exceptions.UserException) IOException(java.io.IOException)

Example 14 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SparkContextFactory method determineDefaultSparkMaster.

/**
     * Create the default Spark master, determines the number of cores it should use. Applicable to Spark test only.
     *   Read the specification from the environmental variable GATK_TEST_SPARK_CORES
     *      If the enviromental variable is not set,  use all available cores as in "local[*]"
     *      If the value is a positive integer, use the value
     *      If the value is invalid (strings, empty, etc), throw an UserException
     *      If the value is a negative interger or zero, throw an UserException
     */
private static String determineDefaultSparkMaster() {
    final String defaultSparkMasterString = "local[*]";
    String sparkMasterString;
    String sparkSpecFromEnvironment = System.getenv(SPARK_CORES_ENV_VARIABLE);
    if (null == sparkSpecFromEnvironment) {
        sparkMasterString = defaultSparkMasterString;
    } else {
        int numSparkCoresFromEnv = 0;
        try {
            numSparkCoresFromEnv = Integer.parseInt(sparkSpecFromEnvironment);
        } catch (NumberFormatException e) {
            throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Positive integers only");
        }
        if (numSparkCoresFromEnv > 0) {
            sparkMasterString = String.format("local[%d]", numSparkCoresFromEnv);
        } else {
            throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Number of cores must be positive");
        }
    }
    return sparkMasterString;
}
Also used : UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 15 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReplaceSamHeader method standardReheader.

private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new UserException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() + ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, replacementHeader, true)) {
        final ProgressLogger progress = new ProgressLogger(logger);
        for (final SAMRecord rec : recordReader) {
            rec.setHeaderStrict(replacementHeader);
            writer.addAlignment(rec);
            progress.record(rec);
        }
    }
    CloserUtil.close(recordReader);
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)100 File (java.io.File)30 IOException (java.io.IOException)30 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)11 SamReader (htsjdk.samtools.SamReader)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)10 SAMRecord (htsjdk.samtools.SAMRecord)9 IntervalList (htsjdk.samtools.util.IntervalList)9 List (java.util.List)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)8 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)8 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 LogManager (org.apache.logging.log4j.LogManager)8 Logger (org.apache.logging.log4j.Logger)8 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 MetricsFile (htsjdk.samtools.metrics.MetricsFile)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)5 FileNotFoundException (java.io.FileNotFoundException)5