use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph in project gatk-protected by broadinstitute.
the class AssemblyResultSetUnitTest method trimmingData.
@DataProvider(name = "trimmingData")
public Iterator<Object[]> trimmingData() {
final AssemblyRegion activeRegion = new AssemblyRegion(new SimpleInterval("1", 1000, 1100), 25, header);
final int length = activeRegion.getExtendedSpan().size();
// keep it prepoducible by fixing the seed to lucky 13.
final RandomDNA rnd = new RandomDNA(13);
final AssemblyRegionTestDataSet actd = new AssemblyRegionTestDataSet(10, new String(rnd.nextBases(length)), new String[] { "Civar:*1T*" }, new String[0], new byte[0], new byte[0], new byte[0]);
final List<Haplotype> haplotypes = actd.haplotypeList();
for (final Haplotype h : haplotypes) h.setGenomeLocation(activeRegion.getExtendedSpan());
final ReadThreadingGraph rtg = new ReadThreadingGraph(10);
for (final Haplotype h : haplotypes) rtg.addSequence("seq-" + Math.abs(h.hashCode()), h.getBases(), h.isReference());
final SeqGraph seqGraph = rtg.toSequenceGraph();
final AssemblyResult ar = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, seqGraph, rtg);
final Map<Haplotype, AssemblyResult> result = new HashMap<>();
for (final Haplotype h : haplotypes) result.put(h, ar);
return Collections.singleton(new Object[] { result, activeRegion }).iterator();
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph in project gatk by broadinstitute.
the class ReadThreadingGraphUnitTest method testDanglingHeads.
@Test(dataProvider = "DanglingHeads")
public void testDanglingHeads(final String ref, final String alt, final String cigar, final boolean shouldBeMerged) {
final int kmerSize = 5;
// create the graph and populate it
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
rtgraph.addSequence("ref", ref.getBytes(), true);
final GATKRead read = ArtificialReadUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
rtgraph.addRead(read, header);
rtgraph.setMaxMismatchesInDanglingHead(10);
rtgraph.buildGraphIfNecessary();
// confirm that we have just a single dangling head
MultiDeBruijnVertex altSource = null;
for (final MultiDeBruijnVertex v : rtgraph.vertexSet()) {
if (rtgraph.isSource(v) && !rtgraph.isReferenceNode(v)) {
Assert.assertTrue(altSource == null, "We found more than one non-reference source");
altSource = v;
}
}
Assert.assertTrue(altSource != null, "We did not find a non-reference source");
// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1);
if (result == null) {
Assert.assertFalse(shouldBeMerged);
return;
}
Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());
// confirm that the tail merging works as expected
final int mergeResult = rtgraph.mergeDanglingHead(result);
Assert.assertTrue(mergeResult > 0 || !shouldBeMerged);
// confirm that we created the appropriate bubble in the graph only if expected
rtgraph.cleanNonRefPaths();
final SeqGraph seqGraph = rtgraph.toSequenceGraph();
final List<KBestHaplotype> paths = new KBestHaplotypeFinder(seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex());
Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1);
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph in project gatk by broadinstitute.
the class ReadThreadingAssemblerUnitTest method testMismatchInFirstKmer.
@Test
public void testMismatchInFirstKmer() {
final TestAssembler assembler = new TestAssembler(3);
final String ref = "ACAACTGA";
final String alt = "AGCTGA";
assembler.addSequence(ref.getBytes(), true);
assembler.addSequence(alt.getBytes(), false);
final SeqGraph graph = assembler.assemble();
graph.simplifyGraph();
graph.removeSingletonOrphanVertices();
final Set<SeqVertex> sources = graph.getSources();
final Set<SeqVertex> sinks = graph.getSinks();
Assert.assertEquals(sources.size(), 1);
Assert.assertEquals(sinks.size(), 1);
Assert.assertNotNull(graph.getReferenceSourceVertex());
Assert.assertNotNull(graph.getReferenceSinkVertex());
final List<KBestHaplotype> paths = new KBestHaplotypeFinder(graph);
Assert.assertEquals(paths.size(), 1);
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph in project gatk by broadinstitute.
the class ReadThreadingAssemblerUnitTest method assertLinearGraph.
private void assertLinearGraph(final TestAssembler assembler, final String seq) {
final SeqGraph graph = assembler.assemble();
graph.simplifyGraph();
Assert.assertEquals(graph.vertexSet().size(), 1);
Assert.assertEquals(graph.vertexSet().iterator().next().getSequenceString(), seq);
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph in project gatk-protected by broadinstitute.
the class ReadThreadingGraphUnitTest method testNsInReadsAreNotUsedForGraph.
@Test(enabled = !DEBUG)
public void testNsInReadsAreNotUsedForGraph() {
final int length = 100;
final byte[] ref = Utils.dupBytes((byte) 'A', length);
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(25);
rtgraph.addSequence("ref", ref, true);
// add reads with Ns at any position
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
for (int i = 0; i < length; i++) {
final byte[] bases = ref.clone();
bases[i] = 'N';
final GATKRead read = ArtificialReadUtils.createArtificialRead(bases, Utils.dupBytes((byte) 30, length), length + "M");
rtgraph.addRead(read, header);
}
rtgraph.buildGraphIfNecessary();
final SeqGraph graph = rtgraph.toSequenceGraph();
Assert.assertEquals(new KBestHaplotypeFinder(graph, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1);
}
Aggregations