use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class IntervalArgumentCollection method parseIntervals.
private void parseIntervals(final GenomeLocParser genomeLocParser) {
// return if no interval arguments at all
if (!intervalsSpecified()) {
throw new GATKException("Cannot call parseIntervals() without specifying either intervals to include or exclude.");
}
GenomeLocSortedSet includeSortedSet;
if (getIntervalStrings().isEmpty()) {
// the -L argument isn't specified, which means that -XL was, since we checked intervalsSpecified()
// therefore we set the include set to be the entire reference territory
includeSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(genomeLocParser.getSequenceDictionary());
} else {
try {
includeSortedSet = IntervalUtils.loadIntervals(getIntervalStrings(), intervalSetRule, intervalMerging, intervalPadding, genomeLocParser);
} catch (UserException.EmptyIntersection e) {
throw new CommandLineException.BadArgumentValue("-L, --interval_set_rule", getIntervalStrings() + "," + intervalSetRule, "The specified intervals had an empty intersection");
}
}
final GenomeLocSortedSet excludeSortedSet = IntervalUtils.loadIntervals(excludeIntervalStrings, IntervalSetRule.UNION, intervalMerging, intervalExclusionPadding, genomeLocParser);
if (excludeSortedSet.contains(GenomeLoc.UNMAPPED)) {
throw new UserException("-XL unmapped is not currently supported");
}
GenomeLocSortedSet intervals;
// if no exclude arguments, can return the included set directly
if (excludeSortedSet.isEmpty()) {
intervals = includeSortedSet;
} else // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
{
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
if (intervals.isEmpty()) {
throw new CommandLineException.BadArgumentValue("-L,-XL", getIntervalStrings().toString() + ", " + excludeIntervalStrings.toString(), "The intervals specified for exclusion with -XL removed all territory specified by -L.");
}
// logging messages only printed when exclude (-XL) arguments are given
final long toPruneSize = includeSortedSet.coveredSize();
final long toExcludeSize = excludeSortedSet.coveredSize();
final long intervalSize = intervals.coveredSize();
logger.info(String.format("Initial include intervals span %d loci; exclude intervals span %d loci", toPruneSize, toExcludeSize));
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)", toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
}
logger.info(String.format("Processing %d bp from intervals", intervals.coveredSize()));
// Separate out requests for unmapped records from the rest of the intervals.
boolean traverseUnmapped = false;
if (intervals.contains(GenomeLoc.UNMAPPED)) {
traverseUnmapped = true;
intervals.remove(GenomeLoc.UNMAPPED);
}
traversalParameters = new TraversalParameters(IntervalUtils.convertGenomeLocsToSimpleIntervals(intervals.toList()), traverseUnmapped);
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceBases.
/**
* Query the Google Genomics API for reference bases spanning the specified interval from the specified
* reference name.
*
* @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
* @param interval - the range of bases to retrieve.
* @return the reference bases specified by interval and apiData (using the Google Genomics API).
*/
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
Utils.nonNull(interval);
if (genomicsService == null) {
if (pipelineOptions == null) {
// Fall back on the saved apiKey for Spark.
GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
options.setApiKey(apiKey);
genomicsService = createGenomicsService(options);
} else {
genomicsService = createGenomicsService(pipelineOptions);
}
}
if (!referenceNameToIdTable.containsKey(interval.getContig())) {
throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
}
try {
final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
listRequest.setStart(interval.getGA4GHStart());
listRequest.setEnd(interval.getGA4GHEnd());
ListBasesResponse result = listRequest.execute();
if (result.getSequence() == null) {
throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
}
byte[] received = result.getSequence().getBytes();
byte[] bases = received;
if (received.length < interval.size()) {
final List<byte[]> blobs = new ArrayList<>();
blobs.add(received);
while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
listRequest.setPageToken(result.getNextPageToken());
result = listRequest.execute();
blobs.add(result.getSequence().getBytes());
}
final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
bases = Bytes.concat(resultsArray);
}
if (bases.length != interval.size()) {
throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
}
return new ReferenceBases(bases, interval);
} catch (IOException e) {
throw new UserException("Query to genomics service failed for reference interval " + interval, e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceNameToReferenceTable.
/**
* getReferenceNameToReferenceTable produces a table from reference contig name
* to the corresponding Reference object. The table is produced via a query to get the mapping. The query
* currently takes ~10 seconds, so this table should be cached and no produced per query to get the reference bases.
* This is clumsy and should be refactored with better Genomics API use (issue 643).
* @param pipelineOptions - are used to get the credentials necessary to call the Genomics API
* @param referenceSetID - the ID of the reference set to use
* @return returns a mapping from reference name to String ID.
*/
public Map<String, Reference> getReferenceNameToReferenceTable(final PipelineOptions pipelineOptions, final String referenceSetID) {
Utils.nonNull(pipelineOptions);
Utils.nonNull(referenceSetID);
fillGenomicsService(pipelineOptions);
final Map<String, Reference> ret = new LinkedHashMap<>();
try {
final SearchReferencesRequest content = new SearchReferencesRequest();
content.setReferenceSetId(referenceSetID);
final SearchReferencesResponse found = genomicsService.references().search(content).execute();
for (Reference r : found.getReferences()) {
ret.put(r.getName(), r);
}
} catch (IOException e) {
throw new UserException("Error while looking up reference set " + referenceSetID, e);
}
return ret;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReadsSparkSource method getHeader.
/**
* Loads the header using Hadoop-BAM.
* @param filePath path to the bam.
* @param referencePath Reference path or null if not available. Reference is required for CRAM files.
* @return the header for the bam.
*/
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
// GCS case
if (BucketUtils.isCloudStorageUrl(filePath)) {
try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
return readsDataSource.getHeader();
}
}
// local file or HDFs case
try {
Path path = new Path(filePath);
FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
if (fs.isDirectory(path)) {
FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {
private static final long serialVersionUID = 1L;
@Override
public boolean accept(Path path) {
return path.getName().startsWith(HADOOP_PART_PREFIX);
}
});
if (bamFiles.length == 0) {
throw new UserException("No BAM files to load header from in: " + path);
}
// Hadoop-BAM writes the same header to each shard, so use the first one
path = bamFiles[0].getPath();
}
setHadoopBAMConfigurationProperties(filePath, referencePath);
return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
} catch (IOException | IllegalArgumentException e) {
throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SparkContextFactory method determineDefaultSparkMaster.
/**
* Create the default Spark master, determines the number of cores it should use. Applicable to Spark test only.
* Read the specification from the environmental variable GATK_TEST_SPARK_CORES
* If the enviromental variable is not set, use all available cores as in "local[*]"
* If the value is a positive integer, use the value
* If the value is invalid (strings, empty, etc), throw an UserException
* If the value is a negative interger or zero, throw an UserException
*/
private static String determineDefaultSparkMaster() {
final String defaultSparkMasterString = "local[*]";
String sparkMasterString;
String sparkSpecFromEnvironment = System.getenv(SPARK_CORES_ENV_VARIABLE);
if (null == sparkSpecFromEnvironment) {
sparkMasterString = defaultSparkMasterString;
} else {
int numSparkCoresFromEnv = 0;
try {
numSparkCoresFromEnv = Integer.parseInt(sparkSpecFromEnvironment);
} catch (NumberFormatException e) {
throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Positive integers only");
}
if (numSparkCoresFromEnv > 0) {
sparkMasterString = String.format("local[%d]", numSparkCoresFromEnv);
} else {
throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Number of cores must be positive");
}
}
return sparkMasterString;
}
Aggregations