use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyRegionTestDataSet method haplotypeList.
public List<Haplotype> haplotypeList() {
if (haplotypeList == null) {
final List<Haplotype> result = new ArrayList<>(haplotypeCigars.length);
final String reference = this.reference;
for (final String cigar : haplotypeCigars) {
if (cigar.matches("^Civar:.*$")) {
stringRepresentation = cigar.substring(6);
result.addAll(expandAllHaplotypeCombinations(cigar.substring(6), reference));
} else if (cigar.matches("^.*\\d+.*$")) {
result.add(cigarToHaplotype(reference, cigar, 0, true));
} else {
final Haplotype h = new Haplotype(cigar.getBytes());
h.setGenomeLocation(genomeLocParser.createGenomeLoc("chr1", 1, reference.length()));
result.add(h);
}
}
haplotypeList = result;
}
return haplotypeList;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyRegionTestDataSet method cigarToHaplotype.
private Haplotype cigarToHaplotype(final String reference, final String cigar, final int offset, final boolean global) {
final String sequence = applyCigar(reference, cigar, offset, global);
final Haplotype haplotype = new Haplotype(sequence.getBytes(), reference.equals(sequence));
haplotype.setGenomeLocation(genomeLocParser.createGenomeLoc("chr1", 1, reference.length()));
haplotype.setCigar(Civar.fromCharSequence(cigar).toCigar(reference.length()));
return haplotype;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyRegionTestDataSet method assemblyResultSet.
public AssemblyResultSet assemblyResultSet() {
if (assemblyResultSet == null) {
final ReadThreadingGraph rtg = new ReadThreadingGraph(kmerSize);
rtg.addSequence("anonymous", getReference().getBytes(), true);
for (final String haplotype : haplotypesStrings()) {
rtg.addSequence("anonymous", haplotype.getBytes(), false);
}
rtg.buildGraphIfNecessary();
if (rtg.hasCycles())
throw new RuntimeException("there is cycles in the reference with kmer size " + kmerSize + ". Don't use this size for the benchmark or change the reference");
List<Haplotype> haplotypeList = haplotypeList();
assemblyResultSet = new AssemblyResultSet();
final AssemblyResult ar = new AssemblyResult((haplotypeList.size() > 1 ? AssemblyResult.Status.ASSEMBLED_SOME_VARIATION : AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE), rtg.toSequenceGraph(), rtg);
for (final Haplotype h : haplotypeList) assemblyResultSet.add(h, ar);
}
return assemblyResultSet;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyRegionTestDataSetUnitTest method testAssemblyRegionsDataSet.
@Test(dataProvider = "activeRegionTestDataSets")
@SuppressWarnings("fallthrough")
public void testAssemblyRegionsDataSet(final AssemblyRegionTestDataSet as, final int kmerSize, final int readLength, final String variation, final int readCount, final int regionSize, final byte bq, final byte iq, final byte dq) {
Assert.assertNotNull(as);
Assert.assertEquals(as.assemblyResultSet().getMaximumKmerSize(), kmerSize);
final List<GATKRead> reads = as.readList();
Assert.assertEquals(reads.size(), readCount);
for (final GATKRead r : reads) {
Assert.assertEquals(r.getLength(), readLength);
}
final List<Haplotype> haplotypes = as.haplotypeList();
final List<Civar> haplotypeCivars = Civar.fromCharSequence(variation).optionalizeAll().unroll();
Assert.assertEquals(haplotypes.size(), haplotypeCivars.size());
Assert.assertTrue(haplotypeCivars.size() > 1);
int variants = 0;
for (int i = 0; i < variation.length(); i++) {
final char c = variation.charAt(i);
switch(c) {
case 'W':
case 'T':
case 'C':
case 'D':
case 'I':
variants++;
default:
}
}
Assert.assertEquals(haplotypes.size(), (int) Math.pow(2, variants));
final Map<String, Integer> haplotypeNumberByString = new HashMap<>();
for (int i = 0; i < haplotypes.size(); i++) {
final Haplotype hap = haplotypes.get(i);
final Civar civar = haplotypeCivars.get(i);
Assert.assertEquals(hap.getBaseString(), civar.applyTo(as.getReference()));
if (i == 0) {
Assert.assertEquals(hap.getBaseString(), as.getReference());
} else {
Assert.assertNotEquals(hap.getBaseString(), as.getReference());
}
Assert.assertFalse(haplotypeNumberByString.containsKey(hap.getBaseString()));
haplotypeNumberByString.put(hap.getBaseString(), i);
}
final int[] hapReadsNotInReference = new int[haplotypes.size()];
for (int i = 0; i < readCount; i++) {
final GATKRead r = as.readList().get(i);
final int hapNumber = i % haplotypes.size();
final int offset = i % (haplotypes.get(hapNumber).length() - readLength);
Assert.assertEquals(getReadString(r), haplotypes.get(hapNumber).getBaseString().substring(offset, offset + readLength));
if (!as.getReference().contains(getReadString(r))) {
hapReadsNotInReference[hapNumber]++;
}
}
Assert.assertEquals(hapReadsNotInReference[0], 0);
for (int i = 1; i < hapReadsNotInReference.length; i++) {
Assert.assertNotEquals(hapReadsNotInReference[i], 0);
}
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class HaplotypeCallerGenotypingEngineUnitTest method makeConstructPhaseSetMappingData.
@DataProvider(name = "ConstructPhaseSetMappingProvider")
public Object[][] makeConstructPhaseSetMappingData() {
List<Object[]> tests = new ArrayList<Object[]>();
final Allele ref = Allele.create("A", true);
final Allele altC = Allele.create("C", false);
final Allele altT = Allele.create("T", false);
final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
pos1.getEventMap().put(1, vc1);
final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
pos2.getEventMap().put(2, vc2);
final Haplotype pos3 = new Haplotype("AACAA".getBytes());
pos3.setEventMap(new EventMap(Arrays.asList(vc3)));
pos3.getEventMap().put(3, vc3);
final Haplotype pos4 = new Haplotype("AAACA".getBytes());
pos4.setEventMap(new EventMap(Arrays.asList(vc4)));
pos4.getEventMap().put(4, vc4);
final Haplotype pos24 = new Haplotype("ACACA".getBytes());
pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4)));
pos24.getEventMap().put(2, vc2);
pos24.getEventMap().put(4, vc4);
final Haplotype pos34 = new Haplotype("AACCA".getBytes());
pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4)));
pos34.getEventMap().put(3, vc3);
pos34.getEventMap().put(4, vc4);
final Haplotype pos234 = new Haplotype("ACCCA".getBytes());
pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4)));
pos234.getEventMap().put(2, vc2);
pos234.getEventMap().put(3, vc3);
pos234.getEventMap().put(4, vc4);
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
// test no phased variants #1
final Set<Haplotype> haplotypes2 = new HashSet<>();
haplotypes2.add(pos2);
haplotypeMap.put(vc2, haplotypes2);
tests.add(new Object[] { Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0 });
// test no phased variants #2
final Set<Haplotype> haplotypes3 = new HashSet<>();
haplotypes3.add(pos3);
haplotypeMap.put(vc3, haplotypes3);
tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
// test opposite phase
tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1 });
// test no phased variants #3
final Set<Haplotype> haplotypes4 = new HashSet<>();
haplotypes4.add(pos4);
haplotypeMap.put(vc4, haplotypes4);
tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
// test mixture
final Set<Haplotype> haplotypes24 = new HashSet<>();
haplotypes24.add(pos24);
haplotypeMap.put(vc2, haplotypes24);
haplotypeMap.put(vc4, haplotypes24);
tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1 });
// test 2 hets
haplotypeMap.remove(vc3);
tests.add(new Object[] { Arrays.asList(vc2, vc4), new HashMap<>(haplotypeMap), 1, 2, 1, 2, 0 });
// test 2 with opposite phase
final Set<Haplotype> haplotypes1 = new HashSet<>();
haplotypes1.add(pos1);
haplotypeMap.put(vc1, haplotypes1);
tests.add(new Object[] { Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2 });
// test homs around a het
final Set<Haplotype> haplotypes2hom = new HashSet<>();
haplotypes2hom.add(pos24);
haplotypes2hom.add(pos234);
final Set<Haplotype> haplotypes4hom = new HashSet<>();
haplotypes4hom.add(pos24);
haplotypes4hom.add(pos234);
final Set<Haplotype> haplotypes3het = new HashSet<>();
haplotypes3het.add(pos234);
haplotypeMap.put(vc2, haplotypes2hom);
haplotypeMap.put(vc3, haplotypes3het);
haplotypeMap.put(vc4, haplotypes4hom);
tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
// test hets around a hom
final Set<Haplotype> haplotypes2het = new HashSet<>();
haplotypes2het.add(pos234);
final Set<Haplotype> haplotypes4het = new HashSet<>();
haplotypes4het.add(pos234);
final Set<Haplotype> haplotypes3hom = new HashSet<>();
haplotypes3hom.add(pos3);
haplotypes3hom.add(pos234);
haplotypeMap.put(vc2, haplotypes2het);
haplotypeMap.put(vc3, haplotypes3hom);
haplotypeMap.put(vc4, haplotypes4het);
tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
// test no phased variants around a hom
final Set<Haplotype> haplotypes2incomplete = new HashSet<>();
haplotypes2incomplete.add(pos24);
final Set<Haplotype> haplotypes3incomplete = new HashSet<>();
haplotypes3incomplete.add(pos34);
final Set<Haplotype> haplotypes4complete = new HashSet<>();
haplotypes4complete.add(pos24);
haplotypes4complete.add(pos34);
haplotypes4complete.add(pos234);
haplotypeMap.put(vc2, haplotypes2incomplete);
haplotypeMap.put(vc3, haplotypes3incomplete);
haplotypeMap.put(vc4, haplotypes4complete);
tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 0, 0, 0, 0, 0 });
return tests.toArray(new Object[][] {});
}
Aggregations