use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleTandemDuplicationContraction.
/**
* 40-'A' + 20-'C' + 40-'G' is shrunk to 40-'A' + 10-'C' + 40-'G' (forward strand representation)
* Return a list of two entries for positive and reverse strand representations.
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forSimpleTandemDuplicationContraction() throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// simple tandem duplication contraction '+' strand representation
final byte[] leftRefFlank = makeDummySequence(40, (byte) 'A');
final byte[] rightRefFlank = makeDummySequence(40, (byte) 'G');
final byte[] doubleDup = makeDummySequence(20, (byte) 'C');
final byte[] contigSeq = new byte[90];
System.arraycopy(leftRefFlank, 0, contigSeq, 0, 40);
System.arraycopy(doubleDup, 0, contigSeq, 40, 10);
System.arraycopy(rightRefFlank, 0, contigSeq, 50, 40);
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1, 50, TextCigarCodec.decode("50M40S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41, 100, TextCigarCodec.decode("40S50M"), true, 60, 0);
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// simple tandem duplication contraction '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
SequenceUtil.reverseComplement(doubleDup);
System.arraycopy(rightRefFlank, 0, contigSeq, 0, 40);
System.arraycopy(doubleDup, 0, contigSeq, 40, 10);
System.arraycopy(leftRefFlank, 0, contigSeq, 50, 40);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100051, 100100), 1, 50, TextCigarCodec.decode("50M40S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41, 100, TextCigarCodec.decode("40S50M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forDeletionWithHomology.
/**
* 40-'C' + 'ATCG' + 34 bases of unique sequence + 'ATCG' + 40-'T' is shrunk to 40-'C' + 'ATCG' + 40-'T' (forward strand representation)
* Return a list of two entries for positive and reverse strand representations.
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forDeletionWithHomology(final ByteArrayOutputStream outputStream) throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// simple deletion with homology '+' strand representation
final byte[] leftRefFlank = makeDummySequence(40, (byte) 'C');
final byte[] rightRefFlank = makeDummySequence(40, (byte) 'T');
final byte[] homology = new byte[] { 'A', 'T', 'C', 'G' };
outputStream.reset();
outputStream.write(leftRefFlank);
outputStream.write(homology);
outputStream.write(rightRefFlank);
byte[] contigSeq = outputStream.toByteArray();
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100044), 1, 44, TextCigarCodec.decode("44M40S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100079, 100122), 41, 84, TextCigarCodec.decode("40S44M"), true, 60, 0);
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// simple deletion with homology '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
SequenceUtil.reverseComplement(homology);
outputStream.reset();
outputStream.write(rightRefFlank);
outputStream.write(homology);
outputStream.write(leftRefFlank);
contigSeq = outputStream.toByteArray();
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100079, 100122), 1, 44, TextCigarCodec.decode("44M40S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100044), 41, 84, TextCigarCodec.decode("40S44M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forLongRangeSubstitution.
/**
* 50-'A' + 50-'C' where the middle 10-'A'+10-'C' is substituted with 10-'G' (forward strand representation)
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forLongRangeSubstitution() throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// long range substitution '+' strand representation
final byte[] leftRefFlank = makeDummySequence(50, (byte) 'A');
final byte[] rightRefFlank = makeDummySequence(50, (byte) 'G');
final byte[] substitution = makeDummySequence(10, (byte) 'C');
byte[] contigSeq = new byte[leftRefFlank.length + rightRefFlank.length - 10];
System.arraycopy(leftRefFlank, 0, contigSeq, 0, 40);
System.arraycopy(substitution, 0, contigSeq, 40, substitution.length);
System.arraycopy(rightRefFlank, 0, contigSeq, 50, 40);
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100040), 1, 40, TextCigarCodec.decode("40M50S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100061, 100100), 51, 90, TextCigarCodec.decode("50S40M"), true, 60, 0);
NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// long range substitution '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
SequenceUtil.reverseComplement(substitution);
System.arraycopy(rightRefFlank, 0, contigSeq, 0, 40);
System.arraycopy(substitution, 0, contigSeq, 40, substitution.length);
System.arraycopy(leftRefFlank, 0, contigSeq, 40 + substitution.length, 40);
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100061, 100100), 1, 40, TextCigarCodec.decode("40M50S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100040), 51, 90, TextCigarCodec.decode("50S40M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleInversionWithNovelInsertion_leftFlankingForwardStrandOnly.
private static Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String> forSimpleInversionWithNovelInsertion_leftFlankingForwardStrandOnly() throws IOException {
// inversion with inserted sequence
final byte[] leftFlank = makeDummySequence(146, (byte) 'A');
final byte[] rightFlankRC = makeDummySequence(50, (byte) 'C');
final byte[] contigSeq = new byte[leftFlank.length + 1 + rightFlankRC.length];
System.arraycopy(leftFlank, 0, contigSeq, 0, leftFlank.length);
contigSeq[leftFlank.length] = (byte) 'T';
System.arraycopy(rightFlankRC, 0, contigSeq, leftFlank.length + 1, rightFlankRC.length);
final AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 108569149, 108569294), 1, 146, TextCigarCodec.decode("146M51S"), true, 60, 0);
final AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 108569315, 108569364), 148, 197, TextCigarCodec.decode("147S50M"), false, 60, 0);
final AlignedContig alignedContig = new AlignedContig("asm000001:tig00001", contigSeq, Arrays.asList(region1, region2));
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), alignedContig.contigName), alignedContig.contigSequence);
return new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001");
}
use of scala.Tuple4 in project gatk by broadinstitute.
the class SVDiscoveryTestDataProvider method forSimpleInsertion.
/**
* 100-'A' + 100-'T' and a 50 bases of 'C' is inserted at the A->T junction point (forward strand description)
* Return a list of two entries for positive and reverse strand representations.
*/
private static List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> forSimpleInsertion(final ByteArrayOutputStream outputStream) throws IOException {
final List<Tuple4<AlignedAssembly.AlignmentInterval, AlignedAssembly.AlignmentInterval, NovelAdjacencyReferenceLocations, String>> result = new ArrayList<>();
// simple insertion '+' strand representation
final byte[] leftRefFlank = makeDummySequence(100, (byte) 'A');
final byte[] insertedSeq = makeDummySequence(50, (byte) 'C');
final byte[] rightRefFlank = makeDummySequence(100, (byte) 'T');
outputStream.reset();
outputStream.write(leftRefFlank);
outputStream.write(insertedSeq);
outputStream.write(rightRefFlank);
byte[] contigSeq = outputStream.toByteArray();
AlignedAssembly.AlignmentInterval region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100100), 1, 100, TextCigarCodec.decode("100M100S"), true, 60, 0);
AlignedAssembly.AlignmentInterval region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100101, 100200), 151, 250, TextCigarCodec.decode("100S100M"), true, 60, 0);
final NovelAdjacencyReferenceLocations breakpoints = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpoints, "asm000001:tig00001"));
// simple insertion '-' strand representation
SequenceUtil.reverseComplement(leftRefFlank);
SequenceUtil.reverseComplement(rightRefFlank);
SequenceUtil.reverseComplement(insertedSeq);
outputStream.reset();
outputStream.write(rightRefFlank);
outputStream.write(insertedSeq);
outputStream.write(leftRefFlank);
contigSeq = outputStream.toByteArray();
region1 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100101, 100200), 1, 100, TextCigarCodec.decode("100M100S"), false, 60, 0);
region2 = new AlignedAssembly.AlignmentInterval(new SimpleInterval("21", 100001, 100100), 151, 250, TextCigarCodec.decode("100S100M"), false, 60, 0);
final NovelAdjacencyReferenceLocations breakpointsDetectedFromReverseStrand = new NovelAdjacencyReferenceLocations(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001"), contigSeq);
result.add(new Tuple4<>(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));
return result;
}
Aggregations