Search in sources :

Example 21 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method getOutieMode.

/**
     * Calculates the mode for outward-facing pairs, using the first SAMPLE_FOR_MODE
     * outward-facing pairs found in INPUT
     */
private double getOutieMode() {
    int samplePerFile = SAMPLE_FOR_MODE / INPUT.size();
    Histogram<Integer> histo = new Histogram<>();
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        int sampled = 0;
        for (Iterator<SAMRecord> it = reader.iterator(); it.hasNext() && sampled < samplePerFile; ) {
            SAMRecord sam = it.next();
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // If we get here we've hit the end of the aligned reads
            if (sam.getReadUnmappedFlag() && sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                break;
            } else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) {
                continue;
            } else {
                if ((sam.getAttribute(SAMTag.MQ.name()) == null || sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) && sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY && sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() && sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                    if (SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
                        histo.increment(Math.abs(sam.getInferredInsertSize()));
                        sampled++;
                    }
                }
            }
        }
        CloserUtil.close(reader);
    }
    return histo.size() > 0 ? histo.getMode() : 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) Histogram(htsjdk.samtools.util.Histogram) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 22 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class ReadsSparkSinkUnitTest method readsSinkADAMTest.

@Test(enabled = false, dataProvider = "loadReadsADAM", groups = "spark")
public void readsSinkADAMTest(String inputBam, String outputDirectoryName) throws IOException {
    // Since the test requires that we not create the actual output directory in advance,
    // we instead create its parent directory and mark it for deletion on exit. This protects
    // us from naming collisions across multiple instances of the test suite.
    final File outputParentDirectory = createTempDir(outputDirectoryName + "_parent");
    final File outputDirectory = new File(outputParentDirectory, outputDirectoryName);
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, null).filter(// filter out unmapped reads (see comment below)
    r -> !r.isUnmapped());
    SAMFileHeader header = readSource.getHeader(inputBam, null);
    ReadsSparkSink.writeReads(ctx, outputDirectory.getAbsolutePath(), null, rddParallelReads, header, ReadsWriteFormat.ADAM);
    JavaRDD<GATKRead> rddParallelReads2 = readSource.getADAMReads(outputDirectory.getAbsolutePath(), null, header);
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
    // Test the round trip
    //make a mutable copy for sort
    List<GATKRead> samList = new ArrayList<>(rddParallelReads.collect());
    //make a mutable copy for sort
    List<GATKRead> adamList = new ArrayList<>(rddParallelReads2.collect());
    Comparator<GATKRead> comparator = new ReadCoordinateComparator(header);
    samList.sort(comparator);
    adamList.sort(comparator);
    for (int i = 0; i < samList.size(); i++) {
        SAMRecord expected = samList.get(i).convertToSAMRecord(header);
        SAMRecord observed = adamList.get(i).convertToSAMRecord(header);
        // manually test equality of some fields, as there are issues with roundtrip BAM -> ADAM -> BAM
        // see https://github.com/bigdatagenomics/adam/issues/823
        Assert.assertEquals(observed.getReadName(), expected.getReadName(), "readname");
        Assert.assertEquals(observed.getAlignmentStart(), expected.getAlignmentStart(), "getAlignmentStart");
        Assert.assertEquals(observed.getAlignmentEnd(), expected.getAlignmentEnd(), "getAlignmentEnd");
        Assert.assertEquals(observed.getFlags(), expected.getFlags(), "getFlags");
        Assert.assertEquals(observed.getMappingQuality(), expected.getMappingQuality(), "getMappingQuality");
        Assert.assertEquals(observed.getMateAlignmentStart(), expected.getMateAlignmentStart(), "getMateAlignmentStart");
        Assert.assertEquals(observed.getCigar(), expected.getCigar(), "getCigar");
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) ReadCoordinateComparator(org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 23 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class SAMRecordSerializerUnitTest method testChangingContigsOnHeaderlessSAMRecord.

@Test
public void testChangingContigsOnHeaderlessSAMRecord() {
    final SparkConf conf = new SparkConf().set("spark.kryo.registrator", "org.broadinstitute.hellbender.engine.spark.SAMRecordSerializerUnitTest$TestGATKRegistrator");
    final SAMRecord read = ((SAMRecordToGATKReadAdapter) ArtificialReadUtils.createHeaderlessSamBackedRead("read1", "1", 100, 50)).getEncapsulatedSamRecord();
    final SAMRecord roundTrippedRead = SparkTestUtils.roundTripInKryo(read, SAMRecord.class, conf);
    Assert.assertEquals(roundTrippedRead, read, "\nActual read: " + roundTrippedRead.getSAMString() + "\nExpected read: " + read.getSAMString());
    read.setReferenceName("2");
    read.setAlignmentStart(1);
    final SAMRecord roundTrippedRead2 = SparkTestUtils.roundTripInKryo(read, SAMRecord.class, conf);
    Assert.assertEquals(roundTrippedRead2, read, "\nActual read: " + roundTrippedRead2.getSAMString() + "\nExpected read: " + read.getSAMString());
}
Also used : SAMRecordToGATKReadAdapter(org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter) SAMRecord(htsjdk.samtools.SAMRecord) SparkConf(org.apache.spark.SparkConf) Test(org.testng.annotations.Test)

Example 24 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class SAMRecordSerializerUnitTest method testSerializerRoundTripHeaderlessSAMRecord.

@Test
public void testSerializerRoundTripHeaderlessSAMRecord() {
    SparkConf conf = new SparkConf().set("spark.kryo.registrator", "org.broadinstitute.hellbender.engine.spark.SAMRecordSerializerUnitTest$TestGATKRegistrator");
    // check round trip with no header
    final SAMRecord read = ((SAMRecordToGATKReadAdapter) ArtificialReadUtils.createHeaderlessSamBackedRead("read1", "1", 100, 50)).getEncapsulatedSamRecord();
    final SAMRecord roundTrippedRead = SparkTestUtils.roundTripInKryo(read, SAMRecord.class, conf);
    Assert.assertEquals(roundTrippedRead, read, "\nActual read: " + roundTrippedRead.getSAMString() + "\nExpected read: " + read.getSAMString());
}
Also used : SAMRecordToGATKReadAdapter(org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter) SAMRecord(htsjdk.samtools.SAMRecord) SparkConf(org.apache.spark.SparkConf) Test(org.testng.annotations.Test)

Example 25 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class AlignedContigGeneratorUnitTest method testConvertGATKReadsToAlignedContig.

@Test(groups = "sv")
public void testConvertGATKReadsToAlignedContig() throws Exception {
    final String read1Seq = "ACACACACACACACACACACACACACACCAGAAGAAAAATTCTGGTAAAACTTATTTGTTCTTAAACATAAAACTAGAGGTGCAAAATAACATTAGTGTATGGTTAATATAGGAAAGATAAGCAATTTCATCTTCTTTGTACCCCACTTATTGGTACTCAACAAGTTTTGAATAAATTCGTGAAATAAAGAAAAAAACCAACAAGTTCATATCTCCAAGTAGTCTTGGTAATTTAAACACTTTCAAGTATCTTCATACCCTGTTAGTACTTCTTTCTGCTCTGTGTCAACAGAAGTATTCTCAACACTCTTGTGGTTAATTTGGTTAAAACTCATTACAAAGAACCCTAAGTTATCCGTCACAGCTGCTAAACCCATTAGTACAGGAGACTTTAAGGCCATAATGTGTCCATTTTCAGGATATAATTGAAGAGAGGCAAATGATACATGGTTTTCCAAAAATATTGGACCAGGGAGCCTCTTCAAGAAAGAATCCCTGATTCGGGAGTTCTTATACTTTTTCAAGAA";
    final byte[] read1Quals = new byte[read1Seq.length()];
    Arrays.fill(read1Quals, (byte) 'A');
    final String read1Cigar = "527M2755H";
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(read1Seq.getBytes(), read1Quals, read1Cigar);
    read1.setPosition("chrX", 6218482);
    read1.setIsReverseStrand(true);
    read1.setIsSupplementaryAlignment(true);
    final String read2Seq = "ATATATATATATATATATATATATATAATATAAGAAGGAAAAATGTGGTTTTCCATTATTTTCTTTTTCTTATCCTCATCATTCACCAAATCTATATTAAACAACTCATAACATCTGGCCTGGGTAATAGAGTGAGACCCCAACTCCACAAAGAAACAAAAATTAAAAACAAATTAGCCGGGCCTGATGGCAAGTACCTGTGGTTCCAGCTGAGGTGGGAGGATCACTTGAGCCCAGGAGTTCAGGGCTGCAGTGAGCTATGATTACGCCAGTGTACTCCAGCCTGGGAGACAGAGCAAGACCCTATCTCTAAAAATATAAATACATAAATAAATAAATAATAAATAAAAATAGAAAATGTACAATGAAAGTTATAAAGTTGGCCAGGCGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCGAATCACCTGAGGTCAGTAGTTCAAGACTAGCCTGGCCAACATGGCGAAATCCTGTCTCTACTAAAAATACAAAAACTAGCTGGGTGTGGTGGTGTGTGCCTGTAATCCCAGCTATACAGGAGGCTGAGGCCGGAGAATTGCTTGAACCTGGGAGGGGGAGGTTGCAGTGAGCCAAGATCGTGCCATTGCACTGTAGCTTGGGTGACAGAGCGAGACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAAGTTATAAAGTTACCTATGATGGGTCTGGATGTACTCCTTATTTAGGAGTGAAGACATTCGTTAACATGAGACCTAAGTAAGTAGAAAGTATGTGTTTAAGGGACAGGTGTCCATTTTCTCTAGGTCTCCTGGAAGCTTTTTTTTTCTAATTTGAGTACTAGTTCCAAAAAAGGTGTTACCGCCTATGTTTATAGTGAAACTATCTATGTGTGACAAAATTCTACCCTCTCTTGTCCATCAATATTGTGCAATGTTGTGTACTTGTATGGAGAAATAGACAACTTTTACAAAGATCAAACTAGGCACCCTTTACCAACGCTAAACTCATAACCCTTTTATCTGCCTTTGTAGAAGATTCTCACCTTTATTTCTCTTGGTCCCCAAAGGCTTCGCTGAGAGACTGTTCCCATTGAAGCCTTGTGGCAAAGTCAGTAGAGCATTGTATCAGCCCAGCTTCAGAAACTCTGCATTCACTTTTAATAAATATGGAGAAGTTTGAAGTCACAAAAGCTGAGAACCTCATTACAGTCTCTTTATGTTCTTGGGTCCCTCTCTTCCTGCAGACTTCCTTGAATTCAAATTTAATGTGCGCTACTCATTCACATTGCACCTACTGAAAAGCATTGACAATTCCAGGTGACGACAGGAGCAAAAGGGAATGAATGCTAGAGGGTTTCTCATTTAGGTTGTTCACTCACCACTGTGCACAGGTTCTTTAGAATTCTACCTACACATGAAATAATATATAGTAAAATCCATATACATCTATAATAAATATATTCCAATGTATATATATTGTATATATATAGTTCTTGTTCTTTTGTATGAAAAGTACATGCAAATTTTATATGTATATACATATATACGTATATATATGTATGTGTGTGTGTGTATATATATATATACACACACACAAACCATATATATAGTTTGACATGCATTTTCCAAAAGAAACTATATTGTATGTGGTGAATGAACTTCCAGGCTGGTTCACAGGAGCTATTGTTAGTTAATGGTGCACAGAAACCACAGCTTCCTTCCCTCTTCCCTTCCTTTCTTCTTTCCTTCCTTCCTTCCCTCCTTCCTTCCTTCTTTTTCTCTTCTTTCTTTCCTTCCTTCTTTCCTTCATTCCCTCCCTCTCTCCTTCCTACCTTTTTCTCTTCTTTCTTTCCTTCCTTTCTTCTTTCCCTCCTTCCCTCCTTCTCTCCTTCCCTTCGATCTATCATCCTTCTCCCTCCCTCCCTGCCTCCTTTCTTCCTTCCTTTTCTCTTTTCTTTCTTTCTTCCTTCCTTCCTTCCTCCTTTCCTTCCCTTCCTTCCTCCCTTCGTTCCTCCATCCCTTCCTTCTTCCTTCTGTCCTTTCTTCCTTCCTTCTTCCCTTCCTTCTTTCTTCCCTCATTCCTTTATTCTATTTGGATGAGTGACAAGGTCTCAGGGATCCTTAAAGTATTAAAGAGAAAATGAAGATAATTCTATAGAATAACAGATCAAATAGGAAAGAGTAATGAAAAGAAAGACTCCTGGAATGACAAAGAAGCATGTTCATGACAATTTAATAGAATTTCCCACATGCCATATTTGTATAGCTGGTTATATTTTAAAACTTTGTCAAACCCTATATTTATAAAACCTTGATTTTGATTTTAGTAAATTCTATGAATAAATGCATACATAAAGCAGATTACTTTGATTCGACAAAAACATACAGGCACCTGTGAGCTTTTTTTTTTTTTATGTGGTAGTGGAAACAGCCTCATTAATTAATGGCTATTTCCTATATTCTTTTTAAAATTAATAAGCAACAGTCAGATATTTCAGCATTTTTAAGGCATGAACAATACCTTGGATTCTAATATATTTTAGTCTAAAATATCTGGTAACATCACAGATAATTGTTTCAAACTCTACCACCCCTCACACAATTTATTTTTCAACAATTTAAAGTCTTTTAGAATAATGAGATAACTTTAAAAATAGACAACATTACTCATACTGGTGAGCTAGGATTGCTTAAAGATGGGGCATAGATTGCATTGTCTCAGAGGAAAATATTTTATGAAGATTCTTGAAAAAGTATAAGAACTCCCGAATCAGGGATTCTTTCTTGAAGAGGCTCCCTGGTCCAATATTTTTGGAAAACCATGTATCATTTGCCTCTCTTCAATTATATCCTGAAAATGGACACATTATGGCCTTAAAGTCTCCTGTACTAATGGGTTTAGCAGCTGTGACGGATAACTTAGGGTTCTTTGTAATGAGTTTTAACCAAATTAACCACAAGAGTGTTGAGAATACTTCTGTTGACACAGAGCAGAAAGAAGTACTAACAGGGTATGAAGATACTTGAAAGTGTTTAAATTACCAAGACTACTTGGAGATATGAACTTGTTGGTTTTTTTCTTTATTTCACGAATTTATTCAAAACTTGTTGAGTACCAATAAGTGGGGTACAAAGAAGATGAAATTGCTTATCTTTCCTATATTAACCATACACTAATGTTATTTTGCACCTCTAGTTTTATGTTTAAGAACAAATAAGTTTTACCAGAATTTTTCTTCTGGTGTGTGTGTGTGTGTGTGTGTGTGTGT";
    final byte[] read2Quals = new byte[read2Seq.length()];
    Arrays.fill(read2Quals, (byte) 'A');
    final String read2Cigar = "1429S377M8D34M4I120M16D783M535S";
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(read2Seq.getBytes(), read2Quals, read2Cigar);
    read2.setPosition("chrX", 6219006);
    final String read3Seq = "GGGACCAAGAGAAATAAAGGTGAGAATCTTCTACAAAGGCAGATAAAAGGGTTATGAGTTTAGCGTTGGTAAAGGGTGCCTAGTTTGATCTTTGTAAAAGTTGTCTATTTCTCCATACAAGTACACAACATTGCACAATATTGATGGACAAGAGAGGGTAGAATTTTGTCACACATAGATAGTTTCACTATAAACATAGGCGGTAACACCTTTTTTGGAACTAGTACTCAAATTAGAAAAAAAAAGCTTCCAGGAGACCTAGAGAAAATGGACACCTGTCCCTTAAACACATACTTTCTACTTACTTAGGTCTCATGTTAACGAATGTCTTCACTCCTAAATAAGGAGTACATCCAGACCCATCATAGGTAACTTTATAACTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTCGCTCTGTCACCCAAGCTACAGTGCAATGGCACGATCTTGGCTCACTGCAACCTCCCCCTCCCAGGTTCAAGCAATTCTCCGGCCTCAGCCTCCTGTATAGCTGGGATTACAGGCACACACCACCACACCCAGCTAGTTTTTGTATTTTTAGTAGAGACAGGATTTCGCCATGTTGGCCAGGCTAGTCTTGAACTACTGACCTCAGGTGATTCGCCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCTGGCCAACTTTATAACTTTCATTGTACATTTTCTATTTTTATTTATTATTTATTTATTTATGTATTTATATTTTTAGAGATAGGGTCTTGCTCTGTCTCCCAGGCTGGAGTACACTGGCGTAATCATAGCTCACTGCAGCCCTGAACTCCTGGGCTCAAGTGATCCTCCCACCTCAGCTGGAACCACAGGTACTTGCCATCAGGCCCGGCTAATTTGTTTTTAATTTTTGTTTCTTTGTGGAGTTGGGGTCTCACTCTATTACCCAGGCCAGATGTTATGAGTTGTTTAATATAGATTTGGTGAATGATGAGGATAAGAAAAAGAAAATAATGGAAAACCACATTTTTCCTTCTTATATTATATATATATATATATATATATATAT";
    final byte[] read3Quals = new byte[read3Seq.length()];
    Arrays.fill(read3Quals, (byte) 'A');
    final String read3Cigar = "2207H385M6I684M";
    final GATKRead read3 = ArtificialReadUtils.createArtificialRead(read3Seq.getBytes(), read3Quals, read3Cigar);
    read3.setIsReverseStrand(true);
    read3.setIsSupplementaryAlignment(true);
    final List<SAMRecord> reads = Stream.of(read1, read2, read3).map(read -> read.convertToSAMRecord(null)).collect(Collectors.toList());
    final AlignedContig alignedContig = DiscoverVariantsFromContigAlignmentsSAMSpark.SAMFormattedContigAlignmentParser.parseReadsAndBreakGaps(reads, null, SVConstants.DiscoveryStepConstants.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY, null);
    assertEquals(alignedContig.contigSequence, read2.getBases());
    assertEquals(alignedContig.alignmentIntervals.size(), 3);
    final byte[] read4Bytes = SVDiscoveryTestDataProvider.LONG_CONTIG1.getBytes();
    final String read4Seq = new String(read4Bytes);
    final byte[] read4Quals = new byte[read4Seq.length()];
    Arrays.fill(read4Quals, (byte) 'A');
    final String read4Cigar = "1986S236M2D1572M1I798M5D730M1I347M4I535M";
    final GATKRead read4 = ArtificialReadUtils.createArtificialRead(read4Seq.getBytes(), read4Quals, read4Cigar);
    read4.setIsReverseStrand(true);
    read4.setIsSupplementaryAlignment(false);
    final String read5Seq = "TTTCTTTTTTCTTTTTTTTTTTTAGTTGATCATTCTTGGGTGTTTCTCGCAGAGGGGGATTTGGCAGGGTCATAGGACAACAGTGGAGGGAAGGTCAGCAGATAAACAAGTGAACAAAGGTCTCTGGTTTTCCTAGGCAGAGGACCCTGCGGCCTTCCGCAGTGTTTGTGTCCCCGGGTACTTGAGATTAGGGAGTGGTGATGACTCTTAACGAGCATGCTGCCTTCAAGCATCTGTTTAACAAAGCACATCTTGCACCGCCCTTAATCCATTTAACCCTGAGTGGACACAGCACATGTTTCAGAGAGCACAGGGTTGGGGGTAAGGTCATAGGTCAACAGGATCCCAAGGCAGAAGAATTTTTCTTAGTACAGAACAAAATGAAGTCTCCCATGTCTACCTCTTTCTACACAGACACAGCAACCATCCGATTTCTCAATCTTTTCCCCACCTTTCCCCCGTTTCTATTCCACAAAACTGCCATTGTCATCATGGCCCGTTCTCAATGAGCTGTTGGGTACACCTCCCAGACGGGGTGGTGGCCGGGCAGAGGGGCTCCTCACTTCCCAGTAGGGGCGGCCGGGCAGAGGCACCCCCCACCTCCCGGACGGGGCGGCTGGCTGGGCGGGGGGCTGACCCCCCCCACCTCCCTCCCGGACGGGGCGTCAGCCCAGCGTTTCTGATTGGATAATGCTTAAGGCCCCCGCCCCCTCAGGCCCTGAGTAACAGAAAATGTGATCAGGACTGAGTGAAGAAAAAGTCACAGCCTAAGCTGCAGCGTTTTTCAGGCAGGGCTTCCTCCCTGAGCTAAGCCAGGCCCAACCCAGATCATGGGAAAATTCTATCTTTTTTTTTACTCTCTCTCTTTTTGAATATATTCAAAAGGTGAACAGAAGTATTTTGCTGTCATATTAATAATACATAAAATTTTTTTCAAGAGAAAATCAGCTTTTACTTTGGTAATAGTGTATTATCAATACTAAAGCTAATTTTAATAAACCTTATAAATAAATCAAATTTGTCATTTTTGACCACTCCGGTTTTACATGTATATTTTGTAATCTCTTGTAATTTTTAAAAACTGTTTACATTTTATTTTTATCCAAATTCTTTTTATTTTTTCAATTTGAAACCACCTTTAAGTAATTTCAAATTGTTATAGGAGATAGAAAGAAGTCATTTAGGGCCAGGTTCACTGGCAAGTGCCTGTAATCGCAACACTTTGGGAGGCCAAGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCATTTCTACTAAAAGTACAAATAACTAGCTGGGTGTGGTGCTGCACACCTGTAGACCCAGCTAGTCCGGAGGCTGAGGCAGGAGAATTGCTTAAACCCAGAAGGCGGAGGTTGTGTAACTGCCCAAGGGGTTCACCTTGCCCTATGTCTAGACAGAGCTGATTCATCAAGACTGGGTAATTTGTGGAGGAAAAGTTAAATACTAAATTTGAACTTAATTGAACGTGGACAAACTCAGTAGTCACCAAGTTCTCAAACAGGTTGTGTGAGGCCCTTGAGGCATTCATTCAGCGCTGTTTCAGAGAAATCTCTATTTCAATCTATTTCTATATATTAGTTGTTGAAAAACAATAGACAATCGCAAAAACAAGTTGACAGTTTTGTGTTCCTTGACCCCAGTTGCAAATGGCCCTCATGACTGGGCCTTATGCCAAACAACTCGTTACAAAAGAGCTAGGGTATCAGATTGTGCTGAAGCTTCATTAGACCCTCCTCATCTGTGCAGGAATGAGTAGCTGACTCTGGAGCCCAAGCTGTTGCTTCCCAGTCTGGTGGTGAATCCTCCATAGTCTGGTGAGTGTAAATATATATATCTCTTTTCCTTTCTCCTCTTCCCATTGCAATTTGCTTATTATATCAACATGCTTATTATATCAATCTGGTTATTACATTGATTTGCTTATTATATAATTTGCTAATTATATCTGCATTTCCATTTACGTGGCACAAAGCTTATTTACCCTTAAAGGTATTGTGTGTGTGTCTTTTCTTCTCCCCTTGAATGTTTCCCACACAGAATATTTTTGGCGTCACGAACAGGATTCAAAAACCAAACTGTGCCACTTTTTGGCCACAAGGACAGGGCTGGAAACTCGAGGAAATCCCAAATCCAGGGATGGGAACTCCCCAATATCACTGTCAATTCCTACAGGATTGAACGAAGGGGACGAATGCAGAAATGAAGACAAAGACAAAAGATTTGTTTTAAAAGAAGGGGTCAGGCAGGGCGCAGTGGCTCAGGCCTGTAATCCCAGCACTTTGAGAGGCCGAGGTGGGCGGATCGCGAGGTCAGGAGAGCGAAACCATCTTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAATTTAGCCAGGTGTGGTGGCAGACACCTGTAGTCCCAGCTACCTGGGGGGGTGGGGGGGTGGGGCTGAGGCAGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCAGTAAGCCAAGATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGATTCGGTCTCAGAAAAAAAAAAAAAAA";
    final byte[] read5Quals = new byte[read5Seq.length()];
    Arrays.fill(read5Quals, (byte) 'A');
    final String read5Cigar = "3603H24M1I611M1I1970M";
    final GATKRead read5 = ArtificialReadUtils.createArtificialRead(read5Seq.getBytes(), read5Quals, read5Cigar);
    read5.setIsReverseStrand(false);
    read5.setIsSupplementaryAlignment(true);
    List<SAMRecord> reads2 = Stream.of(read4, read5).map(read -> read.convertToSAMRecord(null)).collect(Collectors.toList());
    final AlignedContig alignedContig2 = DiscoverVariantsFromContigAlignmentsSAMSpark.SAMFormattedContigAlignmentParser.parseReadsAndBreakGaps(reads2, null, SVConstants.DiscoveryStepConstants.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY, null);
    // these should be the reverse complements of each other
    assertEquals(alignedContig2.contigSequence.length, read4.getBases().length);
    final List<AlignedAssembly.AlignmentInterval> alignmentIntervals2 = alignedContig2.alignmentIntervals;
    assertEquals(alignmentIntervals2.size(), 2);
    final AlignedAssembly.AlignmentInterval alignmentInterval4 = alignmentIntervals2.get(0);
    assertEquals(alignmentInterval4.startInAssembledContig, 1);
    assertEquals(alignmentInterval4.endInAssembledContig, read4Seq.length() - 1986);
    final AlignedAssembly.AlignmentInterval alignmentInterval5 = alignmentIntervals2.get(1);
    assertEquals(alignmentInterval5.startInAssembledContig, 3604);
    assertEquals(alignmentInterval5.endInAssembledContig, read4Seq.length());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) Iterables(com.google.common.collect.Iterables) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FermiLiteAssembly(org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) CigarUtils(org.broadinstitute.hellbender.utils.read.CigarUtils) Path(org.apache.hadoop.fs.Path) Tuple2(scala.Tuple2) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) Stream(java.util.stream.Stream) SparkUtils(org.broadinstitute.hellbender.utils.spark.SparkUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) MiniClusterUtils(org.broadinstitute.hellbender.utils.test.MiniClusterUtils) SAMRecord(htsjdk.samtools.SAMRecord) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMRecord (htsjdk.samtools.SAMRecord)53 SamReader (htsjdk.samtools.SamReader)31 File (java.io.File)20 Test (org.testng.annotations.Test)15 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)12 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)11 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)9 MetricsFile (htsjdk.samtools.metrics.MetricsFile)9 ArrayList (java.util.ArrayList)8 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)6 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SamReaderFactory (htsjdk.samtools.SamReaderFactory)4 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)4 SAMRecordToGATKReadAdapter (org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3 Histogram (htsjdk.samtools.util.Histogram)3