Search in sources :

Example 71 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class GetBayesianHetCoverageIntegrationTest method testNormalJob.

@Test
public void testNormalJob() {
    final File normalOutputFile = createTempFile("normal-test", ".tsv");
    Pulldown normalHetPulldownExpected, normalHetPulldownResult;
    /* test 1: tight calling stringency */
    final String[] args_1 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(15.0) };
    runCommandLine(args_1);
    normalHetPulldownExpected = new Pulldown(normalHeader);
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.99));
    normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
    Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
    /* test 2: loose calling stringency */
    final String[] args_2 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(2.0) };
    runCommandLine(args_2);
    normalHetPulldownExpected = new Pulldown(normalHeader);
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A, 11, 18.60));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 29.29));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.99));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.60));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.99));
    normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
    Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
}
Also used : Pulldown(org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 72 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class ReferenceConfidenceVariantContextMergerUnitTest method makeReferenceConfidenceMergeData.

@DataProvider(name = "referenceConfidenceMergeData")
public Object[][] makeReferenceConfidenceMergeData() {
    final List<Object[]> tests = new ArrayList<>();
    final int start = 10;
    final SimpleInterval loc = new SimpleInterval("20", start, start);
    final VariantContext VCbase = new VariantContextBuilder("test", "20", start, start, Arrays.asList(Aref)).make();
    final VariantContext VCbase2 = new VariantContextBuilder("test2", "20", start, start, Arrays.asList(Aref)).make();
    final VariantContext VCprevBase = new VariantContextBuilder("test", "20", start - 1, start - 1, Arrays.asList(Aref)).make();
    final int[] standardPLs = new int[] { 30, 20, 10, 71, 72, 73 };
    final int[] reorderedSecondAllelePLs = new int[] { 30, 71, 73, 20, 72, 10 };
    final List<Allele> noCalls = new ArrayList<>(2);
    noCalls.add(Allele.NO_CALL);
    noCalls.add(Allele.NO_CALL);
    final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[] { 0, 100, 1000 }).alleles(noCalls).make();
    final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make();
    final Allele AAref = Allele.create("AA", true);
    final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[] { 0, 80, 800 }).alleles(noCalls).make();
    final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make();
    final List<Allele> A_C = Arrays.asList(Aref, C);
    final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[] { 30, 20, 10 }).alleles(noCalls).make();
    final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make();
    final VariantContext vcA_C = new VariantContextBuilder(VCbase2).alleles(A_C_ALT).genotypes(gA_C).make();
    final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make();
    final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make();
    final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make();
    final List<Allele> A_C_G = Arrays.asList(Aref, C, G);
    final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[] { 40, 20, 30, 20, 10, 30 }).alleles(noCalls).make();
    final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[] { 40, 20, 30, 20, 10, 30, 71, 72, 73, 74 }).alleles(noCalls).make();
    final VariantContext vcA_C_G = new VariantContextBuilder(VCbase2).alleles(A_C_G_ALT).genotypes(gA_C_G).make();
    final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make();
    final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make();
    final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make();
    final Allele A = Allele.create("A", false);
    final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
    final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make();
    final VariantContext vcAA_A_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_A_ALT).genotypes(gAA_A_ALT).make();
    final List<Allele> A_C_del = Arrays.asList(Aref, C, del);
    // first test the case of a single record
    tests.add(new Object[] { "test00", Arrays.asList(vcA_C_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C).make() });
    // now, test pairs:
    // a SNP with another SNP
    tests.add(new Object[] { "test01", Arrays.asList(vcA_C_ALT, vcA_G_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, new GenotypeBuilder("A_G").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make() });
    // a SNP with an indel
    tests.add(new Object[] { "test02", Arrays.asList(vcA_C_ALT, vcA_ATC_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, ATC)).genotypes(gA_C_ALT, new GenotypeBuilder("A_ATC").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make() });
    // a SNP with 2 SNPs
    tests.add(new Object[] { "test03", Arrays.asList(vcA_C_ALT, vcA_C_G_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, gA_C_G).make() });
    // a SNP with a ref record
    tests.add(new Object[] { "test04", Arrays.asList(vcA_C_ALT, vcA_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gA_ALT).make() });
    // spanning records:
    // a SNP with a spanning ref record
    tests.add(new Object[] { "test05", Arrays.asList(vcA_C_ALT, vcAA_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gAA_ALT).make() });
    // a SNP with a spanning deletion
    tests.add(new Object[] { "test06", Arrays.asList(vcA_C_ALT, vcAA_A_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(A_C_del).genotypes(new GenotypeBuilder("A_C").PL(new int[] { 30, 20, 10, 71, 72, 73 }).alleles(noCalls).make(), new GenotypeBuilder("AA_A").PL(new int[] { 30, 71, 73, 20, 72, 10 }).alleles(noCalls).make()).make() });
    // combination of all
    tests.add(new Object[] { "test07", Arrays.asList(vcA_C_ALT, vcA_G_ALT, vcA_ATC_ALT, vcA_C_G_ALT, vcA_ALT, vcAA_ALT, vcAA_A_ALT), loc, false, false, new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, G, ATC, del)).genotypes(new GenotypeBuilder("A_C").PL(new int[] { 30, 20, 10, 71, 72, 73, 71, 72, 73, 73, 71, 72, 73, 73, 73 }).alleles(noCalls).make(), new GenotypeBuilder("A_G").PL(new int[] { 30, 71, 73, 20, 72, 10, 71, 73, 72, 73, 71, 73, 72, 73, 73 }).alleles(noCalls).make(), new GenotypeBuilder("A_ATC").PL(new int[] { 30, 71, 73, 71, 73, 73, 20, 72, 72, 10, 71, 73, 73, 72, 73 }).alleles(noCalls).make(), new GenotypeBuilder("A_C_G").PL(new int[] { 40, 20, 30, 20, 10, 30, 71, 72, 73, 74, 71, 72, 73, 74, 74 }).alleles(noCalls).make(), new GenotypeBuilder("A").PL(new int[] { 0, 100, 1000, 100, 1000, 1000, 100, 1000, 1000, 1000, 100, 1000, 1000, 1000, 1000 }).alleles(noCalls).make(), new GenotypeBuilder("AA").PL(new int[] { 0, 80, 800, 80, 800, 800, 80, 800, 800, 800, 80, 800, 800, 800, 800 }).alleles(noCalls).make(), new GenotypeBuilder("AA_A").PL(new int[] { 30, 71, 73, 71, 73, 73, 71, 73, 73, 73, 20, 72, 72, 72, 10 }).alleles(noCalls).make()).make() });
    // just spanning ref contexts, trying both instances where we want/do not want ref-only contexts
    tests.add(new Object[] { "test08", Arrays.asList(vcAA_ALT), loc, false, false, null });
    tests.add(new Object[] { "test09", Arrays.asList(vcAA_ALT), loc, true, false, new VariantContextBuilder(VCbase).alleles(Arrays.asList(Allele.create("A", true))).genotypes(new GenotypeBuilder("AA").PL(new int[] { 0 }).alleles(noCalls).make()).make() });
    // test uniquification of sample names
    tests.add(new Object[] { "test10", Arrays.asList(vcA_C, vcA_C_ALT), loc, false, true, new VariantContextBuilder(VCbase).alleles(A_C).genotypes(new GenotypeBuilder("A_C.test2").PL(new int[] { 30, 20, 10 }).alleles(noCalls).make(), new GenotypeBuilder("A_C.test").PL(new int[] { 30, 20, 10 }).alleles(noCalls).make()).make() });
    tests.add(new Object[] { "test11", Arrays.asList(vcA_C_G, vcA_C_G_ALT), loc, false, true, new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(new GenotypeBuilder("A_C_G.test2").PL(new int[] { 40, 20, 30, 20, 10, 30 }).alleles(noCalls).make(), new GenotypeBuilder("A_C_G.test").PL(new int[] { 40, 20, 30, 20, 10, 30 }).alleles(noCalls).make()).make() });
    return tests.toArray(new Object[][] {});
}
Also used : ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) DataProvider(org.testng.annotations.DataProvider)

Example 73 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class ReadThreadingAssemblerUnitTest method makeAssembleIntervalsData.

@DataProvider(name = "AssembleIntervalsData")
public Object[][] makeAssembleIntervalsData() {
    List<Object[]> tests = new ArrayList<>();
    final String contig = "1";
    final int start = 100000;
    final int end = 200001;
    final int windowSize = 100;
    final int stepSize = 200;
    final int nReadsToUse = 5;
    for (int startI = start; startI < end; startI += stepSize) {
        final int endI = startI + windowSize;
        final SimpleInterval refLoc = new SimpleInterval(contig, startI, endI);
        tests.add(new Object[] { new ReadThreadingAssembler(), refLoc, nReadsToUse });
    }
    return tests.toArray(new Object[][] {});
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) DataProvider(org.testng.annotations.DataProvider)

Example 74 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class PileupUnitTest method testCreateVerboseOutput.

@Test
public void testCreateVerboseOutput() throws Exception {
    // the header
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final SimpleInterval loc = new SimpleInterval("1:2");
    // create one read/pileup element with deletion with the following verbose string
    final String read1String = "read1@1@10@20";
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setMappingQuality(20);
    read1.setCigar("1M1D9M");
    final PileupElement pe1 = new PileupElement(read1, 1, read1.getCigar().getCigarElement(1), 1, 0);
    // create a second one without it with the following verbose string
    final String read2String = "read2@1@50@10";
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 2, 50);
    read2.setMappingQuality(10);
    read2.setCigar("50M");
    final PileupElement pe2 = PileupElement.createPileupForReadAndOffset(read2, 1);
    // generate the pileups
    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(pe1, pe2));
    // test one read
    Assert.assertEquals(Pileup.createVerboseOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read1"))), "1 " + read1String);
    Assert.assertEquals(Pileup.createVerboseOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read2"))), "0 " + read2String);
    // test two reads
    Assert.assertEquals(Pileup.createVerboseOutput(pileup), "1 " + read1String + "," + read2String);
    // test an empty pileup
    Assert.assertEquals(Pileup.createVerboseOutput(new ReadPileup(loc)), "0 ");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) PileupElement(org.broadinstitute.hellbender.utils.pileup.PileupElement) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 75 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class IntervalAlignmentContextIteratorUnitTest method getAlignmentContexts.

private List<AlignmentContext> getAlignmentContexts(final List<SimpleInterval> locusIntervals, final String bamPath) {
    final List<String> sampleNames = Collections.singletonList("NA12878");
    final ReadsDataSource gatkReads = new ReadsDataSource(IOUtils.getPath(bamPath));
    final SAMFileHeader header = gatkReads.getHeader();
    final Stream<GATKRead> filteredReads = Utils.stream(gatkReads).filter(new WellformedReadFilter(header).and(new ReadFilterLibrary.MappedReadFilter()));
    final SAMSequenceDictionary dictionary = header.getSequenceDictionary();
    final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true);
    List<SimpleInterval> relevantIntervals = locusIntervals;
    if (relevantIntervals == null) {
        relevantIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
    }
    final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(relevantIntervals.iterator());
    final IntervalAlignmentContextIterator intervalAlignmentContextIterator = new IntervalAlignmentContextIterator(locusIteratorByState, intervalLocusIterator, dictionary);
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(intervalAlignmentContextIterator, Spliterator.ORDERED), false).collect(Collectors.toList());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)545 Test (org.testng.annotations.Test)287 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)202 File (java.io.File)102 ArrayList (java.util.ArrayList)66 DataProvider (org.testng.annotations.DataProvider)64 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)60 Collectors (java.util.stream.Collectors)53 java.util (java.util)41 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)40 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)40 UserException (org.broadinstitute.hellbender.exceptions.UserException)39 VariantContext (htsjdk.variant.variantcontext.VariantContext)36 IntStream (java.util.stream.IntStream)34 Target (org.broadinstitute.hellbender.tools.exome.Target)34 IOException (java.io.IOException)32 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)28 Assert (org.testng.Assert)27 Locatable (htsjdk.samtools.util.Locatable)26 List (java.util.List)26