Search in sources :

Example 1 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSet method haplotypeList.

public List<Haplotype> haplotypeList() {
    if (haplotypeList == null) {
        final List<Haplotype> result = new ArrayList<>(haplotypeCigars.length);
        final String reference = this.reference;
        for (final String cigar : haplotypeCigars) {
            if (cigar.matches("^Civar:.*$")) {
                stringRepresentation = cigar.substring(6);
                result.addAll(expandAllHaplotypeCombinations(cigar.substring(6), reference));
            } else if (cigar.matches("^.*\\d+.*$")) {
                result.add(cigarToHaplotype(reference, cigar, 0, true));
            } else {
                final Haplotype h = new Haplotype(cigar.getBytes());
                h.setGenomeLocation(genomeLocParser.createGenomeLoc("chr1", 1, reference.length()));
                result.add(h);
            }
        }
        haplotypeList = result;
    }
    return haplotypeList;
}
Also used : ArrayList(java.util.ArrayList) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 2 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSet method cigarToHaplotype.

private Haplotype cigarToHaplotype(final String reference, final String cigar, final int offset, final boolean global) {
    final String sequence = applyCigar(reference, cigar, offset, global);
    final Haplotype haplotype = new Haplotype(sequence.getBytes(), reference.equals(sequence));
    haplotype.setGenomeLocation(genomeLocParser.createGenomeLoc("chr1", 1, reference.length()));
    haplotype.setCigar(Civar.fromCharSequence(cigar).toCigar(reference.length()));
    return haplotype;
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 3 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSet method assemblyResultSet.

public AssemblyResultSet assemblyResultSet() {
    if (assemblyResultSet == null) {
        final ReadThreadingGraph rtg = new ReadThreadingGraph(kmerSize);
        rtg.addSequence("anonymous", getReference().getBytes(), true);
        for (final String haplotype : haplotypesStrings()) {
            rtg.addSequence("anonymous", haplotype.getBytes(), false);
        }
        rtg.buildGraphIfNecessary();
        if (rtg.hasCycles())
            throw new RuntimeException("there is cycles in the reference with kmer size " + kmerSize + ". Don't use this size for the benchmark or change the reference");
        List<Haplotype> haplotypeList = haplotypeList();
        assemblyResultSet = new AssemblyResultSet();
        final AssemblyResult ar = new AssemblyResult((haplotypeList.size() > 1 ? AssemblyResult.Status.ASSEMBLED_SOME_VARIATION : AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE), rtg.toSequenceGraph(), rtg);
        for (final Haplotype h : haplotypeList) assemblyResultSet.add(h, ar);
    }
    return assemblyResultSet;
}
Also used : ReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 4 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSetUnitTest method testAssemblyRegionsDataSet.

@Test(dataProvider = "activeRegionTestDataSets")
@SuppressWarnings("fallthrough")
public void testAssemblyRegionsDataSet(final AssemblyRegionTestDataSet as, final int kmerSize, final int readLength, final String variation, final int readCount, final int regionSize, final byte bq, final byte iq, final byte dq) {
    Assert.assertNotNull(as);
    Assert.assertEquals(as.assemblyResultSet().getMaximumKmerSize(), kmerSize);
    final List<GATKRead> reads = as.readList();
    Assert.assertEquals(reads.size(), readCount);
    for (final GATKRead r : reads) {
        Assert.assertEquals(r.getLength(), readLength);
    }
    final List<Haplotype> haplotypes = as.haplotypeList();
    final List<Civar> haplotypeCivars = Civar.fromCharSequence(variation).optionalizeAll().unroll();
    Assert.assertEquals(haplotypes.size(), haplotypeCivars.size());
    Assert.assertTrue(haplotypeCivars.size() > 1);
    int variants = 0;
    for (int i = 0; i < variation.length(); i++) {
        final char c = variation.charAt(i);
        switch(c) {
            case 'W':
            case 'T':
            case 'C':
            case 'D':
            case 'I':
                variants++;
            default:
        }
    }
    Assert.assertEquals(haplotypes.size(), (int) Math.pow(2, variants));
    final Map<String, Integer> haplotypeNumberByString = new HashMap<>();
    for (int i = 0; i < haplotypes.size(); i++) {
        final Haplotype hap = haplotypes.get(i);
        final Civar civar = haplotypeCivars.get(i);
        Assert.assertEquals(hap.getBaseString(), civar.applyTo(as.getReference()));
        if (i == 0) {
            Assert.assertEquals(hap.getBaseString(), as.getReference());
        } else {
            Assert.assertNotEquals(hap.getBaseString(), as.getReference());
        }
        Assert.assertFalse(haplotypeNumberByString.containsKey(hap.getBaseString()));
        haplotypeNumberByString.put(hap.getBaseString(), i);
    }
    final int[] hapReadsNotInReference = new int[haplotypes.size()];
    for (int i = 0; i < readCount; i++) {
        final GATKRead r = as.readList().get(i);
        final int hapNumber = i % haplotypes.size();
        final int offset = i % (haplotypes.get(hapNumber).length() - readLength);
        Assert.assertEquals(getReadString(r), haplotypes.get(hapNumber).getBaseString().substring(offset, offset + readLength));
        if (!as.getReference().contains(getReadString(r))) {
            hapReadsNotInReference[hapNumber]++;
        }
    }
    Assert.assertEquals(hapReadsNotInReference[0], 0);
    for (int i = 1; i < hapReadsNotInReference.length; i++) {
        Assert.assertNotEquals(hapReadsNotInReference[i], 0);
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 5 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class HaplotypeCallerGenotypingEngineUnitTest method makeConstructPhaseSetMappingData.

@DataProvider(name = "ConstructPhaseSetMappingProvider")
public Object[][] makeConstructPhaseSetMappingData() {
    List<Object[]> tests = new ArrayList<Object[]>();
    final Allele ref = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final Allele altT = Allele.create("T", false);
    final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
    final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
    final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
    final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
    final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
    final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
    pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
    pos1.getEventMap().put(1, vc1);
    final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
    pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
    pos2.getEventMap().put(2, vc2);
    final Haplotype pos3 = new Haplotype("AACAA".getBytes());
    pos3.setEventMap(new EventMap(Arrays.asList(vc3)));
    pos3.getEventMap().put(3, vc3);
    final Haplotype pos4 = new Haplotype("AAACA".getBytes());
    pos4.setEventMap(new EventMap(Arrays.asList(vc4)));
    pos4.getEventMap().put(4, vc4);
    final Haplotype pos24 = new Haplotype("ACACA".getBytes());
    pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4)));
    pos24.getEventMap().put(2, vc2);
    pos24.getEventMap().put(4, vc4);
    final Haplotype pos34 = new Haplotype("AACCA".getBytes());
    pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4)));
    pos34.getEventMap().put(3, vc3);
    pos34.getEventMap().put(4, vc4);
    final Haplotype pos234 = new Haplotype("ACCCA".getBytes());
    pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4)));
    pos234.getEventMap().put(2, vc2);
    pos234.getEventMap().put(3, vc3);
    pos234.getEventMap().put(4, vc4);
    final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
    // test no phased variants #1
    final Set<Haplotype> haplotypes2 = new HashSet<>();
    haplotypes2.add(pos2);
    haplotypeMap.put(vc2, haplotypes2);
    tests.add(new Object[] { Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0 });
    // test no phased variants #2
    final Set<Haplotype> haplotypes3 = new HashSet<>();
    haplotypes3.add(pos3);
    haplotypeMap.put(vc3, haplotypes3);
    tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
    // test opposite phase
    tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1 });
    // test no phased variants #3
    final Set<Haplotype> haplotypes4 = new HashSet<>();
    haplotypes4.add(pos4);
    haplotypeMap.put(vc4, haplotypes4);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
    // test mixture
    final Set<Haplotype> haplotypes24 = new HashSet<>();
    haplotypes24.add(pos24);
    haplotypeMap.put(vc2, haplotypes24);
    haplotypeMap.put(vc4, haplotypes24);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1 });
    // test 2 hets
    haplotypeMap.remove(vc3);
    tests.add(new Object[] { Arrays.asList(vc2, vc4), new HashMap<>(haplotypeMap), 1, 2, 1, 2, 0 });
    // test 2 with opposite phase
    final Set<Haplotype> haplotypes1 = new HashSet<>();
    haplotypes1.add(pos1);
    haplotypeMap.put(vc1, haplotypes1);
    tests.add(new Object[] { Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2 });
    // test homs around a het
    final Set<Haplotype> haplotypes2hom = new HashSet<>();
    haplotypes2hom.add(pos24);
    haplotypes2hom.add(pos234);
    final Set<Haplotype> haplotypes4hom = new HashSet<>();
    haplotypes4hom.add(pos24);
    haplotypes4hom.add(pos234);
    final Set<Haplotype> haplotypes3het = new HashSet<>();
    haplotypes3het.add(pos234);
    haplotypeMap.put(vc2, haplotypes2hom);
    haplotypeMap.put(vc3, haplotypes3het);
    haplotypeMap.put(vc4, haplotypes4hom);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
    // test hets around a hom
    final Set<Haplotype> haplotypes2het = new HashSet<>();
    haplotypes2het.add(pos234);
    final Set<Haplotype> haplotypes4het = new HashSet<>();
    haplotypes4het.add(pos234);
    final Set<Haplotype> haplotypes3hom = new HashSet<>();
    haplotypes3hom.add(pos3);
    haplotypes3hom.add(pos234);
    haplotypeMap.put(vc2, haplotypes2het);
    haplotypeMap.put(vc3, haplotypes3hom);
    haplotypeMap.put(vc4, haplotypes4het);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
    // test no phased variants around a hom
    final Set<Haplotype> haplotypes2incomplete = new HashSet<>();
    haplotypes2incomplete.add(pos24);
    final Set<Haplotype> haplotypes3incomplete = new HashSet<>();
    haplotypes3incomplete.add(pos34);
    final Set<Haplotype> haplotypes4complete = new HashSet<>();
    haplotypes4complete.add(pos24);
    haplotypes4complete.add(pos34);
    haplotypes4complete.add(pos234);
    haplotypeMap.put(vc2, haplotypes2incomplete);
    haplotypeMap.put(vc3, haplotypes3incomplete);
    haplotypeMap.put(vc4, haplotypes4complete);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 0, 0, 0, 0, 0 });
    return tests.toArray(new Object[][] {});
}
Also used : EventMap(org.broadinstitute.hellbender.utils.haplotype.EventMap) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4