Search in sources :

Example 1 with ReferenceLookup

use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.

the class ExtractSVReadsTest method regression_should_extract_split_read_alignments_as_group_2.

// @Test
@Category(Hg38Tests.class)
public void regression_should_extract_split_read_alignments_as_group_2() throws IOException {
    File ref = Hg38Tests.findHg38Reference();
    ReferenceLookup lookup = new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref));
    Files.copy(new File("src/test/resources/sa.split/test2.sam"), input);
    ExtractSVReads extract = new ExtractSVReads();
    // new ProcessingContext(getFSContext(), ref, lookup, null, getConfig());
    extract.setReference(lookup);
    extract.MIN_CLIP_LENGTH = 4;
    extract.INSERT_SIZE_METRICS = new File("src/test/resources/sa.split/test.insert_size_metrics");
    extract.READ_PAIR_CONCORDANCE_METHOD = ReadPairConcordanceMethod.PERCENTAGE;
    extract.OUTPUT = output;
    extract.INPUT = input;
    try (SamReader reader = SamReaderFactory.make().open(input)) {
        extract.setup(reader.getFileHeader(), input);
    }
    List<SAMRecord> records = getRecords(input);
    List<Boolean> result = records.stream().map(r -> extract.shouldExtract(ImmutableList.of(r), lookup)[0]).collect(Collectors.toList());
    /*
		for (int i = 0; i < records.size(); i++) {
			SAMRecord r = records.get(i);
			System.out.print(r.getSupplementaryAlignmentFlag() ? "S" : " ");
			System.out.print(r.getFirstOfPairFlag() ? "1" : "2");
			System.out.print(" Extracted=" + (result.get(i) ? "y" : "n"));
			System.out.print(" HasConcPair=" + (ExtractSVReads.hasReadPairingConsistentWithReference(extract.getReadPairConcordanceCalculator(), ImmutableList.of(r)) ? "y" : "n"));
			boolean[] ra = ExtractSVReads.hasReadAlignmentConsistentWithReference(ImmutableList.of(r));
			System.out.print(" HasConcRead=" + (ra[0] ? "y" : "n") + (ra[1] ? "y" : "n"));
			System.out.println(" " + new ChimericAlignment(r).toString());
		}*/
    // primary read pair alignment implies an unexpected library fragment size = extract them all
    Assert.assertEquals(ImmutableList.of(true, true, true, true), result);
    lookup.close();
}
Also used : FixedSizeReadPairConcordanceCalculator(au.edu.wehi.idsv.FixedSizeReadPairConcordanceCalculator) Iterables(com.google.common.collect.Iterables) ReadPairConcordanceMethod(au.edu.wehi.idsv.ReadPairConcordanceMethod) Iterators(com.google.common.collect.Iterators) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) StructuralVariantReadMetrics(gridss.analysis.StructuralVariantReadMetrics) ChimericAlignment(au.edu.wehi.idsv.sam.ChimericAlignment) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) IntermediateFilesTest(au.edu.wehi.idsv.IntermediateFilesTest) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category) SamReader(htsjdk.samtools.SamReader) MetricsFile(htsjdk.samtools.metrics.MetricsFile) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) Hg38Tests(au.edu.wehi.idsv.Hg38Tests) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Assert.assertFalse(org.junit.Assert.assertFalse) Assert(org.junit.Assert) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Assert.assertEquals(org.junit.Assert.assertEquals) SamReader(htsjdk.samtools.SamReader) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category)

Example 2 with ReferenceLookup

use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.

the class ByReadNameSinglePassSamProgram method makeItSo.

public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<ByReadNameSinglePassSamProgram> programs) throws FileNotFoundException {
    // Setup the standard inputs
    IOUtil.assertFileIsReadable(input);
    SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    // Optionally load up the reference sequence and double check sequence dictionaries
    final ReferenceLookup lookup;
    if (referenceSequence == null) {
        lookup = null;
    } else {
        IOUtil.assertFileIsReadable(referenceSequence);
        lookup = new TwoBitBufferedReferenceSequenceFile(new IndexedFastaSequenceFile(referenceSequence));
        if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
            SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), lookup.getSequenceDictionary());
        }
    }
    // Check on the sort order of the BAM file
    final SortOrder sort = in.getFileHeader().getSortOrder();
    if (sort != SortOrder.queryname) {
        if (assumeSorted) {
            log.warn("File reports sort order '" + sort + "', assuming it's queryname sorted anyway.");
        } else {
            throw new PicardException("File " + input.getAbsolutePath() + " should be queryname sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be queryname sorted you may pass ASSUME_SORTED=true");
        }
    }
    for (final ByReadNameSinglePassSamProgram program : programs) {
        program.setReference(lookup);
        program.setup(in.getFileHeader(), input);
    }
    final ProgressLogger progress = new ProgressLogger(log);
    final SAMRecordIterator rawit = in.iterator();
    final CloseableIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(rawit, "ByReadNameSinglePassSamProgram " + input.getName());
    try {
        List<SAMRecord> currentRecords = new ArrayList<>();
        String currentReadName = null;
        while (it.hasNext()) {
            SAMRecord r = it.next();
            String readname = r.getReadName();
            // if read name we have to just treat it as a single read
            if (readname == null || !readname.equals(currentReadName)) {
                if (currentRecords.size() > 0) {
                    for (final ByReadNameSinglePassSamProgram program : programs) {
                        program.acceptFragment(currentRecords, lookup);
                    }
                }
                currentRecords.clear();
                currentReadName = readname;
                if (stopAfter > 0 && progress.getCount() >= stopAfter) {
                    break;
                }
            }
            currentRecords.add(r);
            progress.record(r);
        }
        if (currentRecords.size() > 0) {
            for (final ByReadNameSinglePassSamProgram program : programs) {
                program.acceptFragment(currentRecords, lookup);
            }
        }
    } finally {
        CloserUtil.close(it);
        CloserUtil.close(rawit);
        CloserUtil.close(in);
    }
    for (final ByReadNameSinglePassSamProgram program : programs) {
        program.finish();
    }
}
Also used : TwoBitBufferedReferenceSequenceFile(au.edu.wehi.idsv.picard.TwoBitBufferedReferenceSequenceFile) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) ProgressLogger(htsjdk.samtools.util.ProgressLogger) PicardException(picard.PicardException) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) SAMRecord(htsjdk.samtools.SAMRecord)

Example 3 with ReferenceLookup

use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.

the class SingleReadEvidence method withExactHomology.

private BreakendSummary withExactHomology(BreakendSummary location) {
    if (!isBreakendExact())
        return location;
    // even though this could be an inexact homolog
    if (location.end - location.start != 0)
        return location;
    // tandem repeats can have both inserted sequence and sequence homology.
    if (untemplated.length() > 0)
        return location;
    if (location instanceof BreakpointSummary) {
        if (source != null && source.getContext() != null && source.getContext().getReference() != null) {
            ReferenceLookup lookup = source.getContext().getReference();
            BreakpointSummary bp = (BreakpointSummary) location;
            int localBasesMatchingRemoteReference;
            int remoteBasesMatchingLocalReference;
            if (bp.direction == BreakendDirection.Forward) {
                // anchor -> breakend
                remoteBasesMatchingLocalReference = homologyLength(lookup, bp.referenceIndex, bp.nominal + 1, 1, breakendBases, 0, 1);
                if (bp.direction2 == BreakendDirection.Backward) {
                    localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 - 1, -1, anchorBases, anchorBases.length - 1, -1);
                } else {
                    localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 + 1, 1, anchorBases, anchorBases.length - 1, -1);
                }
            } else {
                remoteBasesMatchingLocalReference = homologyLength(lookup, bp.referenceIndex, bp.nominal - 1, -1, breakendBases, breakendBases.length - 1, -1);
                if (bp.direction2 == BreakendDirection.Forward) {
                    localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 + 1, 1, anchorBases, 0, 1);
                } else {
                    localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 - 1, -1, anchorBases, 0, 1);
                }
            }
            BreakpointSummary adjusted = bp.adjustPosition(localBasesMatchingRemoteReference, remoteBasesMatchingLocalReference, false);
            // push the bounds back in if we overrun our contigs bounds to the homology
            if (adjusted.start <= 0 && localBasesMatchingRemoteReference > 0)
                localBasesMatchingRemoteReference--;
            if (adjusted.start2 <= 0 && remoteBasesMatchingLocalReference > 0)
                remoteBasesMatchingLocalReference--;
            if (adjusted.end > lookup.getSequenceDictionary().getSequence(adjusted.referenceIndex).getSequenceLength() && localBasesMatchingRemoteReference > 0)
                localBasesMatchingRemoteReference--;
            if (adjusted.end2 > lookup.getSequenceDictionary().getSequence(adjusted.referenceIndex2).getSequenceLength() && remoteBasesMatchingLocalReference > 0)
                remoteBasesMatchingLocalReference--;
            BreakpointSummary readjusted = bp.adjustPosition(localBasesMatchingRemoteReference, remoteBasesMatchingLocalReference, false);
            return readjusted;
        } else {
            unableToCalculateHomology = true;
            return location;
        }
    } else {
        // and is already covered by SAMRecordUtil.unclipExactReferenceMatches()
        return location;
    }
}
Also used : ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup)

Example 4 with ReferenceLookup

use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.

the class ExtractSVReadsTest method regression_should_extract_split_read_alignments_as_group.

// @Test
@Category(Hg38Tests.class)
public void regression_should_extract_split_read_alignments_as_group() throws IOException {
    File ref = Hg38Tests.findHg38Reference();
    ReferenceLookup lookup = new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref));
    Files.copy(new File("src/test/resources/sa.split/test1.sam"), input);
    ExtractSVReads extract = new ExtractSVReads();
    // new ProcessingContext(getFSContext(), ref, lookup, null, getConfig());
    extract.setReference(lookup);
    extract.MIN_CLIP_LENGTH = 4;
    extract.INSERT_SIZE_METRICS = new File("src/test/resources/sa.split/test.insert_size_metrics");
    extract.READ_PAIR_CONCORDANCE_METHOD = ReadPairConcordanceMethod.PERCENTAGE;
    extract.OUTPUT = output;
    extract.INPUT = input;
    try (SamReader reader = SamReaderFactory.make().open(input)) {
        extract.setup(reader.getFileHeader(), input);
    }
    List<SAMRecord> records = getRecords(input);
    List<Boolean> result = records.stream().map(r -> extract.shouldExtract(ImmutableList.of(r), lookup)[0]).collect(Collectors.toList());
    /*
		for (int i = 0; i < records.size(); i++) {
			SAMRecord r = records.get(i);
			System.out.print(r.getSupplementaryAlignmentFlag() ? "S" : " ");
			System.out.print(r.getFirstOfPairFlag() ? "1" : "2");
			System.out.print(" Extracted=" + (result.get(i) ? "y" : "n"));
			System.out.print(" HasConcPair=" + (ExtractSVReads.hasReadPairingConsistentWithReference(extract.getReadPairConcordanceCalculator(), ImmutableList.of(r)) ? "y" : "n"));
			boolean[] ra = ExtractSVReads.hasReadAlignmentConsistentWithReference(ImmutableList.of(r));
			System.out.print(" HasConcRead=" + (ra[0] ? "y" : "n") + (ra[1] ? "y" : "n"));
			System.out.println(" " + new ChimericAlignment(r).toString());
		}
		*/
    Assert.assertEquals(records.stream().map(r -> r.getSecondOfPairFlag()).collect(Collectors.toList()), result);
    lookup.close();
}
Also used : FixedSizeReadPairConcordanceCalculator(au.edu.wehi.idsv.FixedSizeReadPairConcordanceCalculator) Iterables(com.google.common.collect.Iterables) ReadPairConcordanceMethod(au.edu.wehi.idsv.ReadPairConcordanceMethod) Iterators(com.google.common.collect.Iterators) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) StructuralVariantReadMetrics(gridss.analysis.StructuralVariantReadMetrics) ChimericAlignment(au.edu.wehi.idsv.sam.ChimericAlignment) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) IntermediateFilesTest(au.edu.wehi.idsv.IntermediateFilesTest) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category) SamReader(htsjdk.samtools.SamReader) MetricsFile(htsjdk.samtools.metrics.MetricsFile) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) Hg38Tests(au.edu.wehi.idsv.Hg38Tests) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Assert.assertFalse(org.junit.Assert.assertFalse) Assert(org.junit.Assert) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Assert.assertEquals(org.junit.Assert.assertEquals) SamReader(htsjdk.samtools.SamReader) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category)

Aggregations

ReferenceLookup (au.edu.wehi.idsv.picard.ReferenceLookup)4 SAMRecord (htsjdk.samtools.SAMRecord)3 SamReader (htsjdk.samtools.SamReader)3 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)3 FixedSizeReadPairConcordanceCalculator (au.edu.wehi.idsv.FixedSizeReadPairConcordanceCalculator)2 Hg38Tests (au.edu.wehi.idsv.Hg38Tests)2 IntermediateFilesTest (au.edu.wehi.idsv.IntermediateFilesTest)2 ReadPairConcordanceMethod (au.edu.wehi.idsv.ReadPairConcordanceMethod)2 SynchronousReferenceLookupAdapter (au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter)2 ChimericAlignment (au.edu.wehi.idsv.sam.ChimericAlignment)2 ImmutableList (com.google.common.collect.ImmutableList)2 Iterables (com.google.common.collect.Iterables)2 Iterators (com.google.common.collect.Iterators)2 Lists (com.google.common.collect.Lists)2 Files (com.google.common.io.Files)2 StructuralVariantReadMetrics (gridss.analysis.StructuralVariantReadMetrics)2 SamReaderFactory (htsjdk.samtools.SamReaderFactory)2 MetricsFile (htsjdk.samtools.metrics.MetricsFile)2 File (java.io.File)2 IOException (java.io.IOException)2