use of htsjdk.samtools.SAMFileHeader in project gatk-protected by broadinstitute.
the class ReadThreadingGraphUnitTest method testNsInReadsAreNotUsedForGraph.
@Test(enabled = !DEBUG)
public void testNsInReadsAreNotUsedForGraph() {
final int length = 100;
final byte[] ref = Utils.dupBytes((byte) 'A', length);
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(25);
rtgraph.addSequence("ref", ref, true);
// add reads with Ns at any position
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
for (int i = 0; i < length; i++) {
final byte[] bases = ref.clone();
bases[i] = 'N';
final GATKRead read = ArtificialReadUtils.createArtificialRead(bases, Utils.dupBytes((byte) 30, length), length + "M");
rtgraph.addRead(read, header);
}
rtgraph.buildGraphIfNecessary();
final SeqGraph graph = rtgraph.toSequenceGraph();
Assert.assertEquals(new KBestHaplotypeFinder(graph, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1);
}
use of htsjdk.samtools.SAMFileHeader in project gatk-protected by broadinstitute.
the class ReadThreadingGraphUnitTest method testCyclesInGraph.
@Test(enabled = !DEBUG)
public void testCyclesInGraph() {
// b37 20:12655200-12655850
final String ref = "CAATTGTCATAGAGAGTGACAAATGTTTCAAAAGCTTATTGACCCCAAGGTGCAGCGGTGCACATTAGAGGGCACCTAAGACAGCCTACAGGGGTCAGAAAAGATGTCTCAGAGGGACTCACACCTGAGCTGAGTTGTGAAGGAAGAGCAGGATAGAATGAGCCAAAGATAAAGACTCCAGGCAAAAGCAAATGAGCCTGAGGGAAACTGGAGCCAAGGCAAGAGCAGCAGAAAAGAGCAAAGCCAGCCGGTGGTCAAGGTGGGCTACTGTGTATGCAGAATGAGGAAGCTGGCCAAGTAGACATGTTTCAGATGATGAACATCCTGTATACTAGATGCATTGGAACTTTTTTCATCCCCTCAACTCCACCAAGCCTCTGTCCACTCTTGGTACCTCTCTCCAAGTAGACATATTTCAGATCATGAACATCCTGTGTACTAGATGCATTGGAAATTTTTTCATCCCCTCAACTCCACCCAGCCTCTGTCCACACTTGGTACCTCTCTCTATTCATATCTCTGGCCTCAAGGAGGGTATTTGGCATTAGTAAATAAATTCCAGAGATACTAAAGTCAGATTTTCTAAGACTGGGTGAATGACTCCATGGAAGAAGTGAAAAAGAGGAAGTTGTAATAGGGAGACCTCTTCGG";
// SNP at 20:12655528 creates a cycle for small kmers
final String alt = "CAATTGTCATAGAGAGTGACAAATGTTTCAAAAGCTTATTGACCCCAAGGTGCAGCGGTGCACATTAGAGGGCACCTAAGACAGCCTACAGGGGTCAGAAAAGATGTCTCAGAGGGACTCACACCTGAGCTGAGTTGTGAAGGAAGAGCAGGATAGAATGAGCCAAAGATAAAGACTCCAGGCAAAAGCAAATGAGCCTGAGGGAAACTGGAGCCAAGGCAAGAGCAGCAGAAAAGAGCAAAGCCAGCCGGTGGTCAAGGTGGGCTACTGTGTATGCAGAATGAGGAAGCTGGCCAAGTAGACATGTTTCAGATGATGAACATCCTGTGTACTAGATGCATTGGAACTTTTTTCATCCCCTCAACTCCACCAAGCCTCTGTCCACTCTTGGTACCTCTCTCCAAGTAGACATATTTCAGATCATGAACATCCTGTGTACTAGATGCATTGGAAATTTTTTCATCCCCTCAACTCCACCCAGCCTCTGTCCACACTTGGTACCTCTCTCTATTCATATCTCTGGCCTCAAGGAGGGTATTTGGCATTAGTAAATAAATTCCAGAGATACTAAAGTCAGATTTTCTAAGACTGGGTGAATGACTCCATGGAAGAAGTGAAAAAGAGGAAGTTGTAATAGGGAGACCTCTTCGG";
final List<GATKRead> reads = new ArrayList<>();
for (int index = 0; index < alt.length() - 100; index += 20) {
reads.add(ArtificialReadUtils.createArtificialRead(Arrays.copyOfRange(alt.getBytes(), index, index + 100), Utils.dupBytes((byte) 30, 100), 100 + "M"));
}
// test that there are cycles detected for small kmer
final ReadThreadingGraph rtgraph25 = new ReadThreadingGraph(25);
rtgraph25.addSequence("ref", ref.getBytes(), true);
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
for (final GATKRead read : reads) {
rtgraph25.addRead(read, header);
}
rtgraph25.buildGraphIfNecessary();
Assert.assertTrue(rtgraph25.hasCycles());
// test that there are no cycles detected for large kmer
final ReadThreadingGraph rtgraph75 = new ReadThreadingGraph(75);
rtgraph75.addSequence("ref", ref.getBytes(), true);
for (final GATKRead read : reads) {
rtgraph75.addRead(read, header);
}
rtgraph75.buildGraphIfNecessary();
Assert.assertFalse(rtgraph75.hasCycles());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class LocusWalker method traverse.
/**
* Implementation of locus-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*
* The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
* for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
* and including deletions only if {@link #includeDeletions()} returns {@code true}.
*/
@Override
public void traverse() {
final SAMFileHeader header = getHeaderForReads();
// get the samples from the read groups
final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
final CountingReadFilter countedFilter = makeReadFilter();
// get the filter and transformed iterator
final Iterator<GATKRead> readIterator = getTransformedReadStream(countedFilter).iterator();
// get the LIBS
final LocusIteratorByState libs = new LocusIteratorByState(readIterator, getDownsamplingInfo(), keepUniqueReadListInLibs(), samples, header, includeDeletions(), includeNs());
final Iterator<AlignmentContext> iterator = createAlignmentContextIterator(header, libs);
// iterate over each alignment, and apply the function
final Spliterator<AlignmentContext> spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
StreamSupport.stream(spliterator, false).forEach(alignmentContext -> {
final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
apply(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(features, alignmentInterval));
progressMeter.update(alignmentInterval);
});
logger.info(countedFilter.getSummaryLine());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReplaceSamHeader method doWork.
/**
* Do the work after command line has been parsed.
* RuntimeException may be thrown by this method, and are reported appropriately.
*
* @return program exit status.
*/
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(HEADER);
IOUtil.assertFileIsWritable(OUTPUT);
final SAMFileHeader replacementHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(HEADER);
if (BamFileIoUtils.isBamFile(INPUT)) {
blockCopyReheader(replacementHeader);
} else {
standardReheader(replacementHeader);
}
return null;
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class RevertOriginalBaseQualitiesAndAddMateCigar method doWork.
@Override
public Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
boolean foundPairedMappedReads = false;
// Check if we can skip this file since it does not have OQ tags and the mate cigar tag is already there.
final CanSkipSamFile skipSamFile = RevertOriginalBaseQualitiesAndAddMateCigar.canSkipSAMFile(INPUT, MAX_RECORDS_TO_EXAMINE, RESTORE_ORIGINAL_QUALITIES, REFERENCE_SEQUENCE);
logger.info(skipSamFile.getMessage(MAX_RECORDS_TO_EXAMINE));
if (skipSamFile.canSkip())
return null;
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).enable(SamReaderFactory.Option.EAGERLY_DECODE).open(INPUT);
final SAMFileHeader inHeader = in.getFileHeader();
// Build the output writer based on the correct sort order
final SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(inHeader);
// same as the input
if (null == SORT_ORDER)
this.SORT_ORDER = inHeader.getSortOrder();
outHeader.setSortOrder(SORT_ORDER);
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
// Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname
final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
final ProgressLogger revertingProgress = new ProgressLogger(logger, 1000000, " reverted OQs");
int numOriginalQualitiesRestored = 0;
for (final SAMRecord record : in) {
// Clean up reads that map off the end of the reference
AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record);
if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) {
// revert the original base qualities
record.setBaseQualities(record.getOriginalBaseQualities());
record.setOriginalBaseQualities(null);
numOriginalQualitiesRestored++;
}
if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag())
foundPairedMappedReads = true;
revertingProgress.record(record);
sorter.add(record);
}
CloserUtil.close(in);
logger.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records");
/**
* Iterator through sorting collection output
* 1. Set mate cigar string and mate information
* 2. push record into SAMFileWriter to the output
*/
try (final SamPairUtil.SetMateInfoIterator sorterIterator = new SamPairUtil.SetMateInfoIterator(sorter.iterator(), true)) {
final ProgressLogger sorterProgress = new ProgressLogger(logger, 1000000, " mate cigars added");
while (sorterIterator.hasNext()) {
final SAMRecord record = sorterIterator.next();
out.addAlignment(record);
sorterProgress.record(record);
}
CloserUtil.close(out);
logger.info("Updated " + sorterIterator.getNumMateCigarsAdded() + " records with mate cigar");
if (!foundPairedMappedReads)
logger.info("Did not find any paired mapped reads.");
}
}
return null;
}
Aggregations