use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.
the class AlignedAssemblyOrExcuse method toSAMStreamForOneContig.
public static Stream<SAMRecord> toSAMStreamForOneContig(final SAMFileHeader header, final List<String> refNames, final int assemblyId, final int contigIdx, final byte[] contigSequence, final List<BwaMemAlignment> alignments) {
if (alignments.isEmpty())
return Stream.empty();
final String readName = formatContigName(assemblyId, contigIdx);
final Map<BwaMemAlignment, String> saTagMap = BwaMemAlignmentUtils.createSATags(alignments, refNames);
return alignments.stream().map(alignment -> {
final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment(readName, contigSequence, null, null, alignment, refNames, header, false, false);
final String saTag = saTagMap.get(alignment);
if (saTag != null)
samRecord.setAttribute("SA", saTag);
return samRecord;
});
}
use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.
the class BwaMemTestUtils method assertCorrectSingleReadAlignment.
public static void assertCorrectSingleReadAlignment(final BwaMemIndex index) {
try (final BwaMemAligner aligner = new BwaMemAligner(index)) {
//real read taken from src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam
final byte[] seq = "GTTTTGTTTACTACAGCTTTGTAGTAAATTTTGAACTCTAAAGTGTTAGTTCTCTAACTTTGTTTGTTTTTCAAGAGTGTTTTGACTCTTCTTACTGCATC".getBytes();
final List<List<BwaMemAlignment>> allAlignments = aligner.alignSeqs(Collections.singletonList(seq));
Assert.assertEquals(allAlignments.size(), 1);
final List<BwaMemAlignment> alignments = allAlignments.get(0);
Assert.assertEquals(alignments.size(), 1);
final BwaMemAlignment alignment = alignments.get(0);
Assert.assertEquals(index.getReferenceContigNames().get(alignment.getRefId()), "20");
Assert.assertEquals(alignment.getCigar(), "101M");
Assert.assertEquals(alignment.getMapQual(), 60);
Assert.assertEquals(alignment.getRefStart(), 9999996);
Assert.assertEquals(alignment.getNMismatches(), 0);
}
}
use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.
the class AlignedAssemblyUnitTest method createInputsAndExpectedResults_BwaMemAlignmentConstruction.
/**
* These alignment records are supposed to be associated with the 4 possible types of evidence we could see for an inversion,
* where the chr1:101-200 bases are inverted, namely
* INV55, where lower contig coordinate is associated with a forward strand lower reference coordinate, and
* higher contig coordinate is associated with a negative strand higher reference/contig coordinate
* INV55, where lower contig coordinate is associated with a forward strand higher reference coordinate, and
* higher contig coordinate is associated with a negative strand lower reference/contig coordinate
* INV33, where lower contig coordinate is associated with a negative strand lower reference coordinate, and
* higher contig coordinate is associated with a forward strand higher reference/contig coordinate
* INV33, where lower contig coordinate is associated with a forward strand higher reference coordinate, and
* higher contig coordinate is associated with a negative strand lower reference/contig coordinate
* Finally, one must be aware of the fact that BWA always outputs CIGAR with a '+'-strand representation,
* therefore we must use such in constructing the BwaMemAlignment's* @return objects stored in each array
* @return an array of arrays, each composed of
* [0] {@link BwaMemAlignment} object,
* [1] expected reference interval,
* [2] expected cigar,
* [3] expected strandedness,
* [4] expected start in assembled contig, 1-based, inclusive
* [5] expected end in assembled contig, 1-based, inclusive
* [6] expected contig length,
* [7] expected {@link AlignedAssembly.AlignmentInterval} object (generated manually with all fields explicitly spell out and given to
* {@link AlignedAssembly.AlignmentInterval#AlignmentInterval(SimpleInterval, int, int, Cigar, boolean, int, int)}
* intended to be used for testing concordance between the two constructors)
*/
@DataProvider(name = "AlignmentIntervalCtorTestForSimpleInversion")
private Object[][] createInputsAndExpectedResults_BwaMemAlignmentConstruction() {
final int[] alignmentStartsOnRef_0Based = { 96, 196, 195, 95, 101, 201, 101, 201 };
final int[] alignmentStartsOnTig_0BasedInclusive = { 0, 4, 0, 5, 0, 6, 0, 7 };
final int[] alignmentEndsOnTig_0BasedExclusive = { 4, 8, 5, 10, 6, 12, 7, 14 };
final int[] seqLen = { 8, 8, 10, 10, 12, 12, 14, 14 };
final int[] mapQualForBwaMemAlgn = { -1, 0, 10, 20, 30, 40, 50, 60 };
final boolean[] strandedness = { true, false, true, false, false, true, false, true };
// each different number represent a different contig's pair of chimeric alignments
final String[] cigarStrings = { "4M4S", "4M4H", "5M5S", "5M5H", "6S6M", "6H6M", "7S7M", "7H7M" };
final Cigar[] cigars = Arrays.stream(cigarStrings).map(TextCigarCodec::decode).toArray(Cigar[]::new);
final Object[][] data = new Object[cigars.length][];
for (int i = 0; i < cigars.length; ++i) {
final BwaMemAlignment bwaMemAlignment = new BwaMemAlignment(strandedness[i] ? 0 : SAMFlag.READ_REVERSE_STRAND.intValue(), 0, alignmentStartsOnRef_0Based[i], alignmentStartsOnRef_0Based[i] + cigars[i].getReferenceLength(), strandedness[i] ? alignmentStartsOnTig_0BasedInclusive[i] : seqLen[i] - alignmentEndsOnTig_0BasedExclusive[i], strandedness[i] ? alignmentEndsOnTig_0BasedExclusive[i] : seqLen[i] - alignmentStartsOnTig_0BasedInclusive[i], mapQualForBwaMemAlgn[i], 0, 1, 1, cigarStrings[i], null, null, 0, Integer.MIN_VALUE, Integer.MAX_VALUE);
final SimpleInterval referenceInterval = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[i] + 1, bwaMemAlignment.getRefEnd());
final AlignedAssembly.AlignmentInterval alignmentInterval = new AlignedAssembly.AlignmentInterval(referenceInterval, alignmentStartsOnTig_0BasedInclusive[i] + 1, alignmentEndsOnTig_0BasedExclusive[i], strandedness[i] ? cigars[i] : CigarUtils.invertCigar(cigars[i]), strandedness[i], Math.max(SAMRecord.NO_MAPPING_QUALITY, bwaMemAlignment.getMapQual()), bwaMemAlignment.getNMismatches());
data[i] = new Object[] { bwaMemAlignment, referenceInterval, strandedness[i] ? cigars[i] : CigarUtils.invertCigar(cigars[i]), strandedness[i], alignmentStartsOnTig_0BasedInclusive[i] + 1, alignmentEndsOnTig_0BasedExclusive[i], seqLen[i], mapQualForBwaMemAlgn[i], alignmentInterval };
}
return data;
}
use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.
the class AlignedAssemblyOrExcuse method serialize.
private void serialize(final Kryo kryo, final Output output) {
output.writeInt(assemblyId);
output.writeString(errorMessage);
if (errorMessage == null) {
final int nContigs = assembly.getNContigs();
final Map<Contig, Integer> contigMap = new HashMap<>();
output.writeInt(nContigs);
for (int idx = 0; idx != nContigs; ++idx) {
final Contig contig = assembly.getContig(idx);
writeContig(contig, output);
contigMap.put(contig, idx);
}
for (final Contig contig : assembly.getContigs()) {
final List<Connection> connections = contig.getConnections();
output.writeInt(connections.size());
for (final Connection connection : connections) {
writeConnection(connection, contigMap, output);
}
}
for (final List<BwaMemAlignment> alignments : contigAlignments) {
output.writeInt(alignments.size());
for (final BwaMemAlignment alignment : alignments) {
writeAlignment(alignment, output);
}
}
}
}
use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.
the class ContigAligner method alignContigs.
/**
* Takes a collection of assembled contigs and aligns them to the reference with bwa-mem. Non-canonical
* (secondary) alignments are filtered out, preserving the primary and supplementary alignments.
* Within the output list, alignments are sorted first by contig (based on the order in which
* the contigs were passed in, and then by their start position on the contig).
* @param assemblyId An identifier for the assembly or set of contigs
* @param contigsCollection The set of all canonical (primary or supplementary) alignments for the contigs.*/
AlignedAssembly alignContigs(final int assemblyId, final ContigsCollection contigsCollection) {
final List<AlignedContig> alignedContigs = new ArrayList<>(contigsCollection.getContents().size());
final BwaMemIndex index = BwaMemIndexSingleton.getInstance(indexImageFile);
try (final BwaMemAligner aligner = new BwaMemAligner(index)) {
final List<String> refNames = index.getReferenceContigNames();
final List<Tuple2<ContigsCollection.ContigID, ContigsCollection.ContigSequence>> contents = contigsCollection.getContents();
final List<byte[]> seqs = contents.stream().map(contigInfo -> contigInfo._2.toString().getBytes()).collect(Collectors.toList());
final List<List<BwaMemAlignment>> allAlignments = aligner.alignSeqs(seqs);
for (int contigIdx = 0; contigIdx < seqs.size(); ++contigIdx) {
final int contigLen = seqs.get(contigIdx).length;
// filter out secondary alignments, convert to AlignmentInterval objects and sort by alignment start pos
final List<AlignedAssembly.AlignmentInterval> alignmentIntervals = allAlignments.get(contigIdx).stream().filter(a -> (a.getSamFlag() & SAMFlag.NOT_PRIMARY_ALIGNMENT.intValue()) == 0).filter(a -> (a.getSamFlag() & SAMFlag.READ_UNMAPPED.intValue()) == 0).map(a -> new AlignedAssembly.AlignmentInterval(a, refNames, contigLen)).sorted(Comparator.comparing(a -> a.startInAssembledContig)).collect(Collectors.toList());
final String contigName = AlignedAssemblyOrExcuse.formatContigName(assemblyId, Integer.valueOf(contents.get(contigIdx)._1.toString().split(" ")[0].replace("contig", "").replace("-", "").replace(">", "")));
alignedContigs.add(new AlignedContig(contigName, seqs.get(contigIdx), alignmentIntervals));
}
}
return new AlignedAssembly(assemblyId, alignedContigs);
}
Aggregations