Search in sources :

Example 6 with BwaMemAlignment

use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.

the class ContigAligner method alignContigs.

/**
     * Takes a collection of assembled contigs and aligns them to the reference with bwa-mem. Non-canonical
     * (secondary) alignments are filtered out, preserving the primary and supplementary alignments.
     * Within the output list, alignments are sorted first by contig (based on the order in which
     * the contigs were passed in, and then by their start position on the contig).
     * @param assemblyId An identifier for the assembly or set of contigs
     * @param contigsCollection The set of all canonical (primary or supplementary) alignments for the contigs.*/
AlignedAssembly alignContigs(final int assemblyId, final ContigsCollection contigsCollection) {
    final List<AlignedContig> alignedContigs = new ArrayList<>(contigsCollection.getContents().size());
    final BwaMemIndex index = BwaMemIndexSingleton.getInstance(indexImageFile);
    try (final BwaMemAligner aligner = new BwaMemAligner(index)) {
        final List<String> refNames = index.getReferenceContigNames();
        final List<Tuple2<ContigsCollection.ContigID, ContigsCollection.ContigSequence>> contents = contigsCollection.getContents();
        final List<byte[]> seqs = contents.stream().map(contigInfo -> contigInfo._2.toString().getBytes()).collect(Collectors.toList());
        final List<List<BwaMemAlignment>> allAlignments = aligner.alignSeqs(seqs);
        for (int contigIdx = 0; contigIdx < seqs.size(); ++contigIdx) {
            final int contigLen = seqs.get(contigIdx).length;
            // filter out secondary alignments, convert to AlignmentInterval objects and sort by alignment start pos
            final List<AlignedAssembly.AlignmentInterval> alignmentIntervals = allAlignments.get(contigIdx).stream().filter(a -> (a.getSamFlag() & SAMFlag.NOT_PRIMARY_ALIGNMENT.intValue()) == 0).filter(a -> (a.getSamFlag() & SAMFlag.READ_UNMAPPED.intValue()) == 0).map(a -> new AlignedAssembly.AlignmentInterval(a, refNames, contigLen)).sorted(Comparator.comparing(a -> a.startInAssembledContig)).collect(Collectors.toList());
            final String contigName = AlignedAssemblyOrExcuse.formatContigName(assemblyId, Integer.valueOf(contents.get(contigIdx)._1.toString().split(" ")[0].replace("contig", "").replace("-", "").replace(">", "")));
            alignedContigs.add(new AlignedContig(contigName, seqs.get(contigIdx), alignmentIntervals));
        }
    }
    return new AlignedAssembly(assemblyId, alignedContigs);
}
Also used : BwaMemAligner(org.broadinstitute.hellbender.utils.bwa.BwaMemAligner) BwaMemIndexSingleton(org.broadinstitute.hellbender.utils.bwa.BwaMemIndexSingleton) java.util(java.util) SAMFlag(htsjdk.samtools.SAMFlag) BwaMemIndex(org.broadinstitute.hellbender.utils.bwa.BwaMemIndex) Tuple2(scala.Tuple2) BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) Collectors(java.util.stream.Collectors) Tuple2(scala.Tuple2) BwaMemIndex(org.broadinstitute.hellbender.utils.bwa.BwaMemIndex) BwaMemAligner(org.broadinstitute.hellbender.utils.bwa.BwaMemAligner)

Example 7 with BwaMemAlignment

use of org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment in project gatk by broadinstitute.

the class BwaMemTestUtils method assertCorrectChimericContigAlignment.

public static void assertCorrectChimericContigAlignment(final BwaMemIndex index) {
    try (final BwaMemAligner aligner = new BwaMemAligner(index)) {
        // synthetic contig generated by extracting 300 bases from two different positions on chr20 in the reference
        final byte[] seq = "GTGGGAGAGAACTGGAACAAGAACCCAGTGCTCTTTCTGCTCTACCCACTGACCCATCCTCTCACGCATCATACACCCATACTCCCATCCACCCACCTTCCCATTCATGCATTCACCCATTCACCCACCTTCCATCCATCTACCATCCACCACGTACCTACACTCCCATCTACCATCCAACCACATTTCCATTCACCCATCCTCCCATCCATCAACCCTCCAATCCACCACCCACAGACCTTCCCATCCATTCATTTACCCATCCACATATTCACCCACCCTCCCATCCATCCATCTACTTGAAGCAGAATGAGGGAGGAAACCCAAGCCAGCTCGGGCTTGATCATGGCAGGCTTTGTCAGTCACTGTGAGTAACCAGACTTTATTTCAAGTGAGTTGGCCAACAGTGTCTCCCCAGTAGTAGAGTGATTGCTCGTCTGCCAGAAAAGGGGCACAGAGCTCTGGACATCAATACTTGCCGATCTCTCCTTCATCAGCCACCCAACCCTGGCAACAGTTTTCAATTACACCTGTGAAAACTTGTGGGTTAAACAGCTGAGATCCATGCCTCAGCTTCTATAGGTGATAAGCCTGTCCTTC".getBytes();
        final List<List<BwaMemAlignment>> allAlignments = aligner.alignSeqs(Collections.singletonList(seq));
        Assert.assertEquals(allAlignments.size(), 1);
        final List<BwaMemAlignment> alignments = allAlignments.get(0);
        Assert.assertEquals(alignments.size(), 2);
        final BwaMemAlignment alignment1;
        final BwaMemAlignment alignment2;
        if (alignments.get(0).getRefStart() < alignments.get(1).getRefStart()) {
            alignment1 = alignments.get(0);
            alignment2 = alignments.get(1);
        } else {
            alignment1 = alignments.get(1);
            alignment2 = alignments.get(0);
        }
        Assert.assertEquals(index.getReferenceContigNames().get(alignment1.getRefId()), "20");
        Assert.assertEquals(alignment1.getCigar(), "300M300S");
        Assert.assertEquals(alignment1.getMapQual(), 60);
        Assert.assertEquals(alignment1.getRefStart(), 999999);
        Assert.assertEquals(index.getReferenceContigNames().get(alignment2.getRefId()), "20");
        Assert.assertEquals(alignment2.getCigar(), "300S300M");
        Assert.assertEquals(alignment2.getMapQual(), 60);
        Assert.assertEquals(alignment2.getRefStart(), 1000999);
    }
}
Also used : BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) List(java.util.List) BwaMemAligner(org.broadinstitute.hellbender.utils.bwa.BwaMemAligner)

Aggregations

BwaMemAlignment (org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment)7 BwaMemAligner (org.broadinstitute.hellbender.utils.bwa.BwaMemAligner)3 Cigar (htsjdk.samtools.Cigar)2 SAMRecord (htsjdk.samtools.SAMRecord)2 java.util (java.util)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2 DataProvider (org.testng.annotations.DataProvider)2 Tuple2 (scala.Tuple2)2 Iterables (com.google.common.collect.Iterables)1 SAMFlag (htsjdk.samtools.SAMFlag)1 TextCigarCodec (htsjdk.samtools.TextCigarCodec)1 Stream (java.util.stream.Stream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 SparkContextFactory (org.broadinstitute.hellbender.engine.spark.SparkContextFactory)1 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)1