Search in sources :

Example 1 with FermiLiteAssembly

use of org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly in project gatk by broadinstitute.

the class AlignedContigGeneratorUnitTest method testConvertAlignedAssemblyOrExcuseToAlignedContigsDirectAndConcordanceWithSAMRoute.

@Test(groups = "sv")
public void testConvertAlignedAssemblyOrExcuseToAlignedContigsDirectAndConcordanceWithSAMRoute() {
    // test "failed" assembly doesn't produce anything
    final AlignedAssemblyOrExcuse excuse = new AlignedAssemblyOrExcuse(1, "justATest");
    Assert.assertTrue(StructuralVariationDiscoveryPipelineSpark.InMemoryAlignmentParser.filterAndConvertToAlignedContigDirect(Collections.singletonList(excuse), refNames, null).isEmpty());
    // produce test assembly and alignment
    final byte[] dummyContigSequence = SVDiscoveryTestDataProvider.makeDummySequence(1000, (byte) 'T');
    final byte[] dummyContigSequenceQuals = SVDiscoveryTestDataProvider.makeDummySequence(1000, (byte) 'A');
    final List<FermiLiteAssembly.Connection> dummyConnections = Collections.emptyList();
    // totally random 100 supporting reads
    final FermiLiteAssembly.Contig unmappedContig = new FermiLiteAssembly.Contig(dummyContigSequence, dummyContigSequenceQuals, 100);
    unmappedContig.setConnections(dummyConnections);
    final BwaMemAlignment unmappedContigAlignment = new BwaMemAlignment(4, -1, -1, -1, -1, -1, -1, -1, 0, 0, "", "", "", -1, -1, 0);
    final FermiLiteAssembly.Contig contigWithAmbiguousMapping = new FermiLiteAssembly.Contig(dummyContigSequence, dummyContigSequenceQuals, 100);
    contigWithAmbiguousMapping.setConnections(dummyConnections);
    // technically not correct but doesn't matter for this case
    final BwaMemAlignment firstAmbiguousMapping = new BwaMemAlignment(256, dummyRefId, 1000000, 1001000, 0, 1000, 0, 20, 100, 100, "800M50I100M50D50M", "", "", -1, -1, 0);
    final BwaMemAlignment secondAmbiguousMapping = new BwaMemAlignment(272, dummyRefId, 2000000, 2001000, 0, 1000, 0, 50, 100, 100, "700M50I200M50D50M", "", "", -1, -1, 0);
    final FermiLiteAssembly.Contig cleanContig = new FermiLiteAssembly.Contig(dummyContigSequence, dummyContigSequenceQuals, 100);
    cleanContig.setConnections(dummyConnections);
    final BwaMemAlignment cleanAlignment = new BwaMemAlignment(0, dummyRefId, 1000000, 1001000, 0, 1000, 60, 0, 100, 0, "1000M", "", "", -1, -1, 0);
    final FermiLiteAssembly.Contig contigWithGapInAlignment = new FermiLiteAssembly.Contig(dummyContigSequence, dummyContigSequenceQuals, 100);
    contigWithGapInAlignment.setConnections(dummyConnections);
    final BwaMemAlignment gappedAlignment = new BwaMemAlignment(0, dummyRefId, 1000000, 1001000, 0, 1000, 60, 0, 100, 0, "700M50I200M50D50M", "", "", -1, -1, 0);
    final List<List<BwaMemAlignment>> allAlignments = Arrays.asList(Collections.singletonList(unmappedContigAlignment), Arrays.asList(firstAmbiguousMapping, secondAmbiguousMapping), Collections.singletonList(cleanAlignment), Collections.singletonList(gappedAlignment));
    final FermiLiteAssembly assembly = new FermiLiteAssembly(Arrays.asList(unmappedContig, contigWithAmbiguousMapping, cleanContig, contigWithGapInAlignment));
    final AlignedAssemblyOrExcuse alignedAssembly = new AlignedAssemblyOrExcuse(1, assembly, allAlignments);
    // test contig extraction without unmapped and unambiguous filtering
    final Iterable<AlignedContig> alignedContigsIncludingUnmapped = StructuralVariationDiscoveryPipelineSpark.InMemoryAlignmentParser.getAlignedContigsInOneAssembly(alignedAssembly, refNames, null);
    Assert.assertEquals(Iterables.size(alignedContigsIncludingUnmapped), 4);
    final Iterator<AlignedContig> it = alignedContigsIncludingUnmapped.iterator();
    Assert.assertTrue(it.next().alignmentIntervals.isEmpty());
    Assert.assertTrue(it.next().alignmentIntervals.isEmpty());
    final List<AlignedAssembly.AlignmentInterval> alignmentIntervalsForCleanContig = it.next().alignmentIntervals;
    Assert.assertEquals(alignmentIntervalsForCleanContig.size(), 1);
    Assert.assertEquals(alignmentIntervalsForCleanContig.get(0), new AlignedAssembly.AlignmentInterval(new SimpleInterval(dummyRefName, 1000001, 1001000), 1, 1000, TextCigarCodec.decode("1000M"), true, 60, 0));
    final List<AlignedAssembly.AlignmentInterval> alignmentIntervalsForContigWithGappedAlignment = it.next().alignmentIntervals;
    Assert.assertEquals(alignmentIntervalsForContigWithGappedAlignment.size(), 3);
    // test direct conversion (essentially the filtering step)
    final List<AlignedContig> parsedContigsViaDirectRoute = StructuralVariationDiscoveryPipelineSpark.InMemoryAlignmentParser.filterAndConvertToAlignedContigDirect(Collections.singleton(alignedAssembly), refNames, null);
    Assert.assertEquals(parsedContigsViaDirectRoute.size(), 2);
    Assert.assertTrue(parsedContigsViaDirectRoute.containsAll(Utils.stream(alignedContigsIncludingUnmapped).filter(ctg -> !ctg.alignmentIntervals.isEmpty()).collect(Collectors.toList())));
    // concordance test with results obtained via SAM route
    final List<AlignedContig> parsedContigsViaSAMRoute = StructuralVariationDiscoveryPipelineSpark.InMemoryAlignmentParser.filterAndConvertToAlignedContigViaSAM(Collections.singletonList(alignedAssembly), hg19Header, SparkContextFactory.getTestSparkContext(), null).collect();
    Assert.assertEquals(parsedContigsViaDirectRoute, parsedContigsViaSAMRoute);
}
Also used : TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) Iterables(com.google.common.collect.Iterables) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FermiLiteAssembly(org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) CigarUtils(org.broadinstitute.hellbender.utils.read.CigarUtils) Path(org.apache.hadoop.fs.Path) Tuple2(scala.Tuple2) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) Stream(java.util.stream.Stream) SparkUtils(org.broadinstitute.hellbender.utils.spark.SparkUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) MiniClusterUtils(org.broadinstitute.hellbender.utils.test.MiniClusterUtils) FermiLiteAssembly(org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly) BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Iterables (com.google.common.collect.Iterables)1 Cigar (htsjdk.samtools.Cigar)1 SAMRecord (htsjdk.samtools.SAMRecord)1 TextCigarCodec (htsjdk.samtools.TextCigarCodec)1 java.util (java.util)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 SparkContextFactory (org.broadinstitute.hellbender.engine.spark.SparkContextFactory)1 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)1 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)1 Utils (org.broadinstitute.hellbender.utils.Utils)1 BwaMemAlignment (org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment)1 FermiLiteAssembly (org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly)1 ArtificialReadUtils (org.broadinstitute.hellbender.utils.read.ArtificialReadUtils)1 CigarUtils (org.broadinstitute.hellbender.utils.read.CigarUtils)1 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)1