use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class GenomeLocParser method createGenomeLocAtStop.
/**
* Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
* @param loc The original loc
* @param maxBasePairs The maximum number of basePairs
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
*/
public GenomeLoc createGenomeLocAtStop(final GenomeLoc loc, final int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
String contigName = loc.getContig();
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
int contigIndex = contig.getSequenceIndex();
int contigLength = contig.getSequenceLength();
int start = loc.getStop() + 1;
int stop = loc.getStop() + maxBasePairs;
if (start > contigLength)
return null;
if (stop > contigLength)
stop = contigLength;
return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class GenomeLocSortedSet method createSetFromSequenceDictionary.
/**
* create a list of genomic locations, given a reference sequence
*
* @param dict the sequence dictionary to create a collection from
*
* @return the GenomeLocSet of all references sequences as GenomeLoc's
*/
public static GenomeLocSortedSet createSetFromSequenceDictionary(final SAMSequenceDictionary dict) {
final GenomeLocParser parser = new GenomeLocParser(dict);
final GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser);
for (final SAMSequenceRecord sequence : dict.getSequences()) {
returnSortedSet.add(parser.createOverEntireContig(sequence.getSequenceName()));
}
return returnSortedSet;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class ReferenceMemorySourceTest method init.
@BeforeTest
public void init() {
List<SAMSequenceRecord> l = new ArrayList<>();
l.add(new SAMSequenceRecord("chr1", 1000000));
SAMSequenceDictionary seqDir = new SAMSequenceDictionary(l);
memorySource = new ReferenceMemorySource(ref1, seqDir);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class ReorderSam method buildSequenceDictionaryMap.
/**
* Constructs a mapping from read sequence records index -> new sequence dictionary index for use in
* reordering the reference index and mate reference index in each read. -1 means unmapped.
*/
private Map<Integer, Integer> buildSequenceDictionaryMap(final SAMSequenceDictionary refDict, final SAMSequenceDictionary readsDict) {
Map<Integer, Integer> newOrder = new HashMap<>();
logger.info("Reordering SAM/BAM file:");
for (final SAMSequenceRecord refRec : refDict.getSequences()) {
final SAMSequenceRecord readsRec = readsDict.getSequence(refRec.getSequenceName());
if (readsRec != null) {
if (refRec.getSequenceLength() != readsRec.getSequenceLength()) {
String msg = String.format("Discordant contig lengths: read %s LN=%d, ref %s LN=%d", readsRec.getSequenceName(), readsRec.getSequenceLength(), refRec.getSequenceName(), refRec.getSequenceLength());
if (ALLOW_CONTIG_LENGTH_DISCORDANCE) {
logger.warn(msg);
} else {
throw new UserException(msg);
}
}
logger.info(String.format(" Reordering read contig %s [index=%d] to => ref contig %s [index=%d]%n", readsRec.getSequenceName(), readsRec.getSequenceIndex(), refRec.getSequenceName(), refRec.getSequenceIndex()));
newOrder.put(readsRec.getSequenceIndex(), refRec.getSequenceIndex());
}
}
for (SAMSequenceRecord readsRec : readsDict.getSequences()) {
if (!newOrder.containsKey(readsRec.getSequenceIndex())) {
if (ALLOW_INCOMPLETE_DICT_CONCORDANCE)
newOrder.put(readsRec.getSequenceIndex(), -1);
else
throw new UserException("New reference sequence does not contain a matching contig for " + readsRec.getSequenceName());
}
}
return newOrder;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFile method getSubsequenceAt.
/**
* Gets the subsequence of the contig in the range [start,stop]
*
* Uses the sequence cache if possible, or updates the cache to handle the request. If the range
* is larger than the cache itself, just loads the sequence directly, not changing the cache at all
*
* @param contig Contig whose subsequence to retrieve.
* @param start inclusive, 1-based start of region.
* @param stop inclusive, 1-based stop of region.
* @return The partial reference sequence associated with this range. If preserveCase is false, then
* all of the bases in the ReferenceSequence returned by this method will be upper cased.
*/
@Override
public ReferenceSequence getSubsequenceAt(final String contig, long start, final long stop) {
final ReferenceSequence result;
if ((stop - start) >= cacheSize) {
cacheMisses++;
result = super.getSubsequenceAt(contig, start, stop);
if (!preserveCase)
StringUtil.toUpperCase(result.getBases());
if (!preserveIUPAC)
BaseUtils.convertIUPACtoN(result.getBases(), true, start < 1);
} else {
// todo -- potential optimization is to check if contig.name == contig, as this in general will be true
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
if (contigInfo == null) {
throw new UserException.MissingContigInSequenceDictionary(contig, super.getSequenceDictionary());
}
if (stop > contigInfo.getSequenceLength())
throw new SAMException("Query asks for data past end of contig. Query contig " + contig + " start:" + start + " stop:" + stop + " contigLength:" + contigInfo.getSequenceLength());
if (start < cache.start || stop > cache.stop || cache.seq == null || cache.seq.getContigIndex() != contigInfo.getSequenceIndex()) {
cacheMisses++;
cache.start = Math.max(start - cacheMissBackup, 0);
cache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength());
cache.seq = super.getSubsequenceAt(contig, cache.start, cache.stop);
// convert all of the bases in the sequence to upper case if we aren't preserving cases
if (!preserveCase)
StringUtil.toUpperCase(cache.seq.getBases());
if (!preserveIUPAC)
BaseUtils.convertIUPACtoN(cache.seq.getBases(), true, cache.start == 0);
} else {
cacheHits++;
}
// at this point we determine where in the cache we want to extract the requested subsequence
final int cacheOffsetStart = (int) (start - cache.start);
final int cacheOffsetStop = (int) (stop - start + cacheOffsetStart + 1);
try {
result = new ReferenceSequence(cache.seq.getName(), cache.seq.getContigIndex(), Arrays.copyOfRange(cache.seq.getBases(), cacheOffsetStart, cacheOffsetStop));
} catch (ArrayIndexOutOfBoundsException e) {
throw new GATKException(String.format("BUG: bad array indexing. Cache start %d and end %d, request start %d end %d, offset start %d and end %d, base size %d", cache.start, cache.stop, start, stop, cacheOffsetStart, cacheOffsetStop, cache.seq.getBases().length), e);
}
}
// for debugging -- print out our efficiency if requested
if (PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0)
printEfficiency(Level.INFO);
return result;
}
Aggregations