use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleInversionFromLongCtg1WithStrangeLeftBreakpoint.
private static Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String> forSimpleInversionFromLongCtg1WithStrangeLeftBreakpoint() throws IOException {
// inversion with strange left breakpoint
final byte[] contigSequence = LONG_CONTIG1.getBytes();
final AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval(chrForLongContig1, 20138007, 20142231), 1, contigSequence.length - 1986, TextCigarCodec.decode("1986S236M2D1572M1I798M5D730M1I347M4I535M"), false, 60, 36);
final AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval(chrForLongContig1, 20152030, 20154634), 3604, contigSequence.length, TextCigarCodec.decode("3603H24M1I611M1I1970M"), true, 60, 36);
final AlignedContig alignedContig = new AlignedContig("asm702700:tig00001", contigSequence, Arrays.asList(region1, region2));
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(ChimericAlignment.fromSplitAlignments(alignedContig, SVConstants.DiscoveryStepConstants.DEFAULT_MIN_ALIGNMENT_LENGTH).get(0), contigSequence);
return new Tuple4<>(region1, region2, breakpoints, "asm702700:tig00001");
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forComplexTandemDuplication.
/**
* These test data was based on a real observation on a locally-assembled contig
* "TGCCAGGTTACATGGCAAAGAGGGTAGATATGGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAAGTCTCCACAGGAGGGCAAGGTGGACAATCTGTCCCATAGGAGGGGGATTCATGAGGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAAGTCTCCACAGGAGGGCAAGGTGGACAATCTGTCCCATAGGAGGGGGATTCAGGAGGGCAGCTGTGGATGGTGCAAATGCCATTTATGCTCCTCTCCACCCATATCC"
* with two alignment records chr18:312579-312718 140M135S
* chr18:312610-312757 127S148M
* for a tandem repeat expansion event from 1 copy to 2 copies with also a pseudo-homologyForwardStrandRep
* Return a list of eight entries for positive and reverse strand representations for:
* 1. expansion from 1 unit to 2 units with pseudo-homology
* 2. contraction from 2 units to 1 unit with pseudo-homology
* 3. contraction from 3 units to 2 units without pseudo-homology
* 4. expansion from 2 units to 3 units without pseudo-homology
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forComplexTandemDuplication() throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// 31
final String leftRefFlank = "TGCCAGGTTACATGGCAAAGAGGGTAGATAT";
// 39
final String rightRefFlank = "TGGTGCAAATGCCATTTATGCTCCTCTCCACCCATATCC";
// 96
final String firstRepeat = "GGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAAGTCTCCACAGGAGGGCAAGGTGGACAATCTGTCCCATAGGAGGGGGATTCATGA";
// 96
final String secondRepeat = "GGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAAGTCTCCACAGGAGGGCAAGGTGGACAATCTGTCCCATAGGAGGGGGATTCAGGA";
// 13
final String pseudoHomology = "GGGCAGCTGTGGA";
// first test (the original observed event, but assigned to a different chromosome): expansion from 1 unit to 2 units with pseudo-homology
final byte[] fakeRefSeqForComplexExpansionWithPseudoHomology = String.format("%s%s%s%s", leftRefFlank, firstRepeat, pseudoHomology, rightRefFlank).getBytes();
final byte[] contigSeqForComplexExpansionWithPseudoHomology = String.format("%s%s%s%s%s", leftRefFlank, firstRepeat, secondRepeat, pseudoHomology, rightRefFlank).getBytes();
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312718), 1, 140, TextCigarCodec.decode("140M135S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312610, 312757), 128, 275, TextCigarCodec.decode("127S148M"), true, 60, 0);
NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexExpansionWithPseudoHomology);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
final byte[] fakeRefSeqForComplexExpansionWithPseudoHomology_reverseStrand = Arrays.copyOf(fakeRefSeqForComplexExpansionWithPseudoHomology, fakeRefSeqForComplexExpansionWithPseudoHomology.length);
final byte[] contigSeqForComplexExpansionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionWithPseudoHomology, contigSeqForComplexExpansionWithPseudoHomology.length);
SequenceUtil.reverseComplement(fakeRefSeqForComplexExpansionWithPseudoHomology_reverseStrand);
SequenceUtil.reverseComplement(contigSeqForComplexExpansionWithPseudoHomology_reverseStrand);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312610, 312757), 1, 148, TextCigarCodec.decode("148M127S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312718), 136, 275, TextCigarCodec.decode("135S140M"), false, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexExpansionWithPseudoHomology_reverseStrand);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// second test: contraction from 2 units to 1 unit with pseudo-homology
final byte[] fakeRefSeqForComplexContractionWithPseudoHomology = contigSeqForComplexExpansionWithPseudoHomology;
final byte[] contigSeqForComplexContractionWithPseudoHomology = fakeRefSeqForComplexExpansionWithPseudoHomology;
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312718), 1, 140, TextCigarCodec.decode("140M39S"), true, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312706, 312853), 32, 179, TextCigarCodec.decode("31S148M"), true, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexContractionWithPseudoHomology);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
final byte[] fakeRefSeqForComplexContractionWithPseudoHomology_reverseStrand = Arrays.copyOf(fakeRefSeqForComplexContractionWithPseudoHomology, fakeRefSeqForComplexContractionWithPseudoHomology.length);
final byte[] contigSeqForComplexContractionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexContractionWithPseudoHomology, contigSeqForComplexContractionWithPseudoHomology.length);
SequenceUtil.reverseComplement(fakeRefSeqForComplexContractionWithPseudoHomology_reverseStrand);
SequenceUtil.reverseComplement(contigSeqForComplexContractionWithPseudoHomology_reverseStrand);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312706, 312853), 1, 148, TextCigarCodec.decode("148M31S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312718), 40, 179, TextCigarCodec.decode("39S140M"), false, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexContractionWithPseudoHomology_reverseStrand);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// third test: contraction from 3 units to 2 units without pseudo-homology
final byte[] fakeRefSeqForComplexContractionNoPseudoHomology = String.format("%s%s%s%s%s", leftRefFlank, firstRepeat, secondRepeat, firstRepeat, rightRefFlank).getBytes();
final byte[] contigSeqForComplexContractionNoPseudoHomology = String.format("%s%s%s%s", leftRefFlank, firstRepeat, secondRepeat, rightRefFlank).getBytes();
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312801), 1, 223, TextCigarCodec.decode("223M39S"), true, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312706, 312936), 32, 262, TextCigarCodec.decode("31S231M"), true, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexContractionNoPseudoHomology);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
final byte[] fakeRefSeqForComplexContractionNoPseudoHomology_reverseStrand = Arrays.copyOf(fakeRefSeqForComplexContractionNoPseudoHomology, fakeRefSeqForComplexContractionNoPseudoHomology.length);
final byte[] contigSeqForComplexContractionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexContractionNoPseudoHomology, contigSeqForComplexContractionNoPseudoHomology.length);
SequenceUtil.reverseComplement(fakeRefSeqForComplexContractionNoPseudoHomology_reverseStrand);
SequenceUtil.reverseComplement(contigSeqForComplexContractionNoPseudoHomology_reverseStrand);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312706, 312936), 1, 231, TextCigarCodec.decode("231M31S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312801), 40, 262, TextCigarCodec.decode("39S223M"), false, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexContractionNoPseudoHomology_reverseStrand);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// fourth test: expansion from 2 units to 3 units without pseudo-homology
final byte[] fakeRefSeqForComplexExpansionNoPseudoHomology = contigSeqForComplexContractionNoPseudoHomology;
final byte[] contigSeqForComplexExpansionNoPseudoHomology = fakeRefSeqForComplexContractionNoPseudoHomology;
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312801), 1, 223, TextCigarCodec.decode("223M135S"), true, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312610, 312840), 128, 358, TextCigarCodec.decode("127S231M"), true, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexExpansionNoPseudoHomology);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
final byte[] fakeRefSeqForComplexExpansionNoPseudoHomology_reverseStrand = Arrays.copyOf(fakeRefSeqForComplexExpansionNoPseudoHomology, fakeRefSeqForComplexExpansionNoPseudoHomology.length);
final byte[] contigSeqForComplexExpansionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionNoPseudoHomology, contigSeqForComplexExpansionNoPseudoHomology.length);
SequenceUtil.reverseComplement(fakeRefSeqForComplexExpansionNoPseudoHomology_reverseStrand);
SequenceUtil.reverseComplement(contigSeqForComplexExpansionNoPseudoHomology_reverseStrand);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312610, 312840), 1, 231, TextCigarCodec.decode("231M127S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("20", 312579, 312801), 136, 358, TextCigarCodec.decode("135S223M"), false, 60, 0);
breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeqForComplexExpansionNoPseudoHomology_reverseStrand);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
return result;
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleTandemDuplicationExpansion.
/**
* 40-'A' + 10-'C' + 40-'G' is expanded to 40-'A' + 20-'C' + 40-'G' (forward strand representation)
* Return a list of two entries for positive and reverse strand representations.
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forSimpleTandemDuplicationExpansion(final ByteArrayOutputStream outputStream) throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// simple tandem duplication expansion '+' strand representation
final byte[] leftRefFlank = makeDummySequence(40, (byte) 'A');
final byte[] rightRefFlank = makeDummySequence(40, (byte) 'G');
final byte[] doubleDup = makeDummySequence(20, (byte) 'C');
outputStream.reset();
outputStream.write(leftRefFlank);
outputStream.write(doubleDup);
outputStream.write(rightRefFlank);
byte[] contigSeq = outputStream.toByteArray();
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1, 50, TextCigarCodec.decode("50M50S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100041, 100090), 51, 100, TextCigarCodec.decode("50S50M"), true, 60, 0);
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// simple tandem duplication expansion '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
SequenceUtil.reverseComplement(doubleDup);
outputStream.reset();
outputStream.write(rightRefFlank);
outputStream.write(doubleDup);
outputStream.write(leftRefFlank);
contigSeq = outputStream.toByteArray();
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100041, 100090), 1, 50, TextCigarCodec.decode("50M50S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100050), 51, 100, TextCigarCodec.decode("50S50M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleDeletion.
/**
* 40-'A' + 10-'C'+10-'T' + 40-'G' where the segment 10-'C'+10-'T' is deleted (forward strand representation description).
*
* Return a list of two entries for positive and reverse strand representations.
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forSimpleDeletion(final ByteArrayOutputStream outputStream) throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// simple deletion '+' strand representation
final byte[] leftRefFlank = makeDummySequence(40, (byte) 'A');
final byte[] rightRefFlank = makeDummySequence(40, (byte) 'G');
outputStream.reset();
outputStream.write(leftRefFlank);
outputStream.write(rightRefFlank);
byte[] contigSeq = outputStream.toByteArray();
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100040), 1, 40, TextCigarCodec.decode("40M40S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100061, 100100), 41, 80, TextCigarCodec.decode("40S40M"), true, 60, 0);
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// simple deletion '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
outputStream.reset();
outputStream.write(rightRefFlank);
outputStream.write(leftRefFlank);
contigSeq = outputStream.toByteArray();
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100061, 100100), 1, 40, TextCigarCodec.decode("40M40S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100040), 41, 80, TextCigarCodec.decode("40S40M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
Aggregations