use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceBases.
/**
* Query the Google Genomics API for reference bases spanning the specified interval from the specified
* reference name.
*
* @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
* @param interval - the range of bases to retrieve.
* @return the reference bases specified by interval and apiData (using the Google Genomics API).
*/
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
Utils.nonNull(interval);
if (genomicsService == null) {
if (pipelineOptions == null) {
// Fall back on the saved apiKey for Spark.
GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
options.setApiKey(apiKey);
genomicsService = createGenomicsService(options);
} else {
genomicsService = createGenomicsService(pipelineOptions);
}
}
if (!referenceNameToIdTable.containsKey(interval.getContig())) {
throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
}
try {
final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
listRequest.setStart(interval.getGA4GHStart());
listRequest.setEnd(interval.getGA4GHEnd());
ListBasesResponse result = listRequest.execute();
if (result.getSequence() == null) {
throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
}
byte[] received = result.getSequence().getBytes();
byte[] bases = received;
if (received.length < interval.size()) {
final List<byte[]> blobs = new ArrayList<>();
blobs.add(received);
while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
listRequest.setPageToken(result.getNextPageToken());
result = listRequest.execute();
blobs.add(result.getSequence().getBytes());
}
final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
bases = Bytes.concat(resultsArray);
}
if (bases.length != interval.size()) {
throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
}
return new ReferenceBases(bases, interval);
} catch (IOException e) {
throw new UserException("Query to genomics service failed for reference interval " + interval, e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceNameToReferenceTable.
/**
* getReferenceNameToReferenceTable produces a table from reference contig name
* to the corresponding Reference object. The table is produced via a query to get the mapping. The query
* currently takes ~10 seconds, so this table should be cached and no produced per query to get the reference bases.
* This is clumsy and should be refactored with better Genomics API use (issue 643).
* @param pipelineOptions - are used to get the credentials necessary to call the Genomics API
* @param referenceSetID - the ID of the reference set to use
* @return returns a mapping from reference name to String ID.
*/
public Map<String, Reference> getReferenceNameToReferenceTable(final PipelineOptions pipelineOptions, final String referenceSetID) {
Utils.nonNull(pipelineOptions);
Utils.nonNull(referenceSetID);
fillGenomicsService(pipelineOptions);
final Map<String, Reference> ret = new LinkedHashMap<>();
try {
final SearchReferencesRequest content = new SearchReferencesRequest();
content.setReferenceSetId(referenceSetID);
final SearchReferencesResponse found = genomicsService.references().search(content).execute();
for (Reference r : found.getReferences()) {
ret.put(r.getName(), r);
}
} catch (IOException e) {
throw new UserException("Error while looking up reference set " + referenceSetID, e);
}
return ret;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReadsSparkSource method getHeader.
/**
* Loads the header using Hadoop-BAM.
* @param filePath path to the bam.
* @param referencePath Reference path or null if not available. Reference is required for CRAM files.
* @return the header for the bam.
*/
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
// GCS case
if (BucketUtils.isCloudStorageUrl(filePath)) {
try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
return readsDataSource.getHeader();
}
}
// local file or HDFs case
try {
Path path = new Path(filePath);
FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
if (fs.isDirectory(path)) {
FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {
private static final long serialVersionUID = 1L;
@Override
public boolean accept(Path path) {
return path.getName().startsWith(HADOOP_PART_PREFIX);
}
});
if (bamFiles.length == 0) {
throw new UserException("No BAM files to load header from in: " + path);
}
// Hadoop-BAM writes the same header to each shard, so use the first one
path = bamFiles[0].getPath();
}
setHadoopBAMConfigurationProperties(filePath, referencePath);
return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
} catch (IOException | IllegalArgumentException e) {
throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SparkContextFactory method determineDefaultSparkMaster.
/**
* Create the default Spark master, determines the number of cores it should use. Applicable to Spark test only.
* Read the specification from the environmental variable GATK_TEST_SPARK_CORES
* If the enviromental variable is not set, use all available cores as in "local[*]"
* If the value is a positive integer, use the value
* If the value is invalid (strings, empty, etc), throw an UserException
* If the value is a negative interger or zero, throw an UserException
*/
private static String determineDefaultSparkMaster() {
final String defaultSparkMasterString = "local[*]";
String sparkMasterString;
String sparkSpecFromEnvironment = System.getenv(SPARK_CORES_ENV_VARIABLE);
if (null == sparkSpecFromEnvironment) {
sparkMasterString = defaultSparkMasterString;
} else {
int numSparkCoresFromEnv = 0;
try {
numSparkCoresFromEnv = Integer.parseInt(sparkSpecFromEnvironment);
} catch (NumberFormatException e) {
throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Positive integers only");
}
if (numSparkCoresFromEnv > 0) {
sparkMasterString = String.format("local[%d]", numSparkCoresFromEnv);
} else {
throw new UserException("Illegal number of cores specified in " + SPARK_CORES_ENV_VARIABLE + ". Number of cores must be positive");
}
}
return sparkMasterString;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReplaceSamHeader method standardReheader.
private void standardReheader(final SAMFileHeader replacementHeader) {
final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
throw new UserException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() + ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
}
try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, replacementHeader, true)) {
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord rec : recordReader) {
rec.setHeaderStrict(replacementHeader);
writer.addAlignment(rec);
progress.record(rec);
}
}
CloserUtil.close(recordReader);
}
Aggregations