use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.
the class ExtractSVReadsTest method regression_should_extract_split_read_alignments_as_group_2.
// @Test
@Category(Hg38Tests.class)
public void regression_should_extract_split_read_alignments_as_group_2() throws IOException {
File ref = Hg38Tests.findHg38Reference();
ReferenceLookup lookup = new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref));
Files.copy(new File("src/test/resources/sa.split/test2.sam"), input);
ExtractSVReads extract = new ExtractSVReads();
// new ProcessingContext(getFSContext(), ref, lookup, null, getConfig());
extract.setReference(lookup);
extract.MIN_CLIP_LENGTH = 4;
extract.INSERT_SIZE_METRICS = new File("src/test/resources/sa.split/test.insert_size_metrics");
extract.READ_PAIR_CONCORDANCE_METHOD = ReadPairConcordanceMethod.PERCENTAGE;
extract.OUTPUT = output;
extract.INPUT = input;
try (SamReader reader = SamReaderFactory.make().open(input)) {
extract.setup(reader.getFileHeader(), input);
}
List<SAMRecord> records = getRecords(input);
List<Boolean> result = records.stream().map(r -> extract.shouldExtract(ImmutableList.of(r), lookup)[0]).collect(Collectors.toList());
/*
for (int i = 0; i < records.size(); i++) {
SAMRecord r = records.get(i);
System.out.print(r.getSupplementaryAlignmentFlag() ? "S" : " ");
System.out.print(r.getFirstOfPairFlag() ? "1" : "2");
System.out.print(" Extracted=" + (result.get(i) ? "y" : "n"));
System.out.print(" HasConcPair=" + (ExtractSVReads.hasReadPairingConsistentWithReference(extract.getReadPairConcordanceCalculator(), ImmutableList.of(r)) ? "y" : "n"));
boolean[] ra = ExtractSVReads.hasReadAlignmentConsistentWithReference(ImmutableList.of(r));
System.out.print(" HasConcRead=" + (ra[0] ? "y" : "n") + (ra[1] ? "y" : "n"));
System.out.println(" " + new ChimericAlignment(r).toString());
}*/
// primary read pair alignment implies an unexpected library fragment size = extract them all
Assert.assertEquals(ImmutableList.of(true, true, true, true), result);
lookup.close();
}
use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.
the class ByReadNameSinglePassSamProgram method makeItSo.
public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<ByReadNameSinglePassSamProgram> programs) throws FileNotFoundException {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceLookup lookup;
if (referenceSequence == null) {
lookup = null;
} else {
IOUtil.assertFileIsReadable(referenceSequence);
lookup = new TwoBitBufferedReferenceSequenceFile(new IndexedFastaSequenceFile(referenceSequence));
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), lookup.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.queryname) {
if (assumeSorted) {
log.warn("File reports sort order '" + sort + "', assuming it's queryname sorted anyway.");
} else {
throw new PicardException("File " + input.getAbsolutePath() + " should be queryname sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be queryname sorted you may pass ASSUME_SORTED=true");
}
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.setReference(lookup);
program.setup(in.getFileHeader(), input);
}
final ProgressLogger progress = new ProgressLogger(log);
final SAMRecordIterator rawit = in.iterator();
final CloseableIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(rawit, "ByReadNameSinglePassSamProgram " + input.getName());
try {
List<SAMRecord> currentRecords = new ArrayList<>();
String currentReadName = null;
while (it.hasNext()) {
SAMRecord r = it.next();
String readname = r.getReadName();
// if read name we have to just treat it as a single read
if (readname == null || !readname.equals(currentReadName)) {
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
currentRecords.clear();
currentReadName = readname;
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
}
currentRecords.add(r);
progress.record(r);
}
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
} finally {
CloserUtil.close(it);
CloserUtil.close(rawit);
CloserUtil.close(in);
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.finish();
}
}
use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.
the class SingleReadEvidence method withExactHomology.
private BreakendSummary withExactHomology(BreakendSummary location) {
if (!isBreakendExact())
return location;
// even though this could be an inexact homolog
if (location.end - location.start != 0)
return location;
// tandem repeats can have both inserted sequence and sequence homology.
if (untemplated.length() > 0)
return location;
if (location instanceof BreakpointSummary) {
if (source != null && source.getContext() != null && source.getContext().getReference() != null) {
ReferenceLookup lookup = source.getContext().getReference();
BreakpointSummary bp = (BreakpointSummary) location;
int localBasesMatchingRemoteReference;
int remoteBasesMatchingLocalReference;
if (bp.direction == BreakendDirection.Forward) {
// anchor -> breakend
remoteBasesMatchingLocalReference = homologyLength(lookup, bp.referenceIndex, bp.nominal + 1, 1, breakendBases, 0, 1);
if (bp.direction2 == BreakendDirection.Backward) {
localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 - 1, -1, anchorBases, anchorBases.length - 1, -1);
} else {
localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 + 1, 1, anchorBases, anchorBases.length - 1, -1);
}
} else {
remoteBasesMatchingLocalReference = homologyLength(lookup, bp.referenceIndex, bp.nominal - 1, -1, breakendBases, breakendBases.length - 1, -1);
if (bp.direction2 == BreakendDirection.Forward) {
localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 + 1, 1, anchorBases, 0, 1);
} else {
localBasesMatchingRemoteReference = homologyLength(lookup, bp.referenceIndex2, bp.nominal2 - 1, -1, anchorBases, 0, 1);
}
}
BreakpointSummary adjusted = bp.adjustPosition(localBasesMatchingRemoteReference, remoteBasesMatchingLocalReference, false);
// push the bounds back in if we overrun our contigs bounds to the homology
if (adjusted.start <= 0 && localBasesMatchingRemoteReference > 0)
localBasesMatchingRemoteReference--;
if (adjusted.start2 <= 0 && remoteBasesMatchingLocalReference > 0)
remoteBasesMatchingLocalReference--;
if (adjusted.end > lookup.getSequenceDictionary().getSequence(adjusted.referenceIndex).getSequenceLength() && localBasesMatchingRemoteReference > 0)
localBasesMatchingRemoteReference--;
if (adjusted.end2 > lookup.getSequenceDictionary().getSequence(adjusted.referenceIndex2).getSequenceLength() && remoteBasesMatchingLocalReference > 0)
remoteBasesMatchingLocalReference--;
BreakpointSummary readjusted = bp.adjustPosition(localBasesMatchingRemoteReference, remoteBasesMatchingLocalReference, false);
return readjusted;
} else {
unableToCalculateHomology = true;
return location;
}
} else {
// and is already covered by SAMRecordUtil.unclipExactReferenceMatches()
return location;
}
}
use of au.edu.wehi.idsv.picard.ReferenceLookup in project gridss by PapenfussLab.
the class ExtractSVReadsTest method regression_should_extract_split_read_alignments_as_group.
// @Test
@Category(Hg38Tests.class)
public void regression_should_extract_split_read_alignments_as_group() throws IOException {
File ref = Hg38Tests.findHg38Reference();
ReferenceLookup lookup = new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref));
Files.copy(new File("src/test/resources/sa.split/test1.sam"), input);
ExtractSVReads extract = new ExtractSVReads();
// new ProcessingContext(getFSContext(), ref, lookup, null, getConfig());
extract.setReference(lookup);
extract.MIN_CLIP_LENGTH = 4;
extract.INSERT_SIZE_METRICS = new File("src/test/resources/sa.split/test.insert_size_metrics");
extract.READ_PAIR_CONCORDANCE_METHOD = ReadPairConcordanceMethod.PERCENTAGE;
extract.OUTPUT = output;
extract.INPUT = input;
try (SamReader reader = SamReaderFactory.make().open(input)) {
extract.setup(reader.getFileHeader(), input);
}
List<SAMRecord> records = getRecords(input);
List<Boolean> result = records.stream().map(r -> extract.shouldExtract(ImmutableList.of(r), lookup)[0]).collect(Collectors.toList());
/*
for (int i = 0; i < records.size(); i++) {
SAMRecord r = records.get(i);
System.out.print(r.getSupplementaryAlignmentFlag() ? "S" : " ");
System.out.print(r.getFirstOfPairFlag() ? "1" : "2");
System.out.print(" Extracted=" + (result.get(i) ? "y" : "n"));
System.out.print(" HasConcPair=" + (ExtractSVReads.hasReadPairingConsistentWithReference(extract.getReadPairConcordanceCalculator(), ImmutableList.of(r)) ? "y" : "n"));
boolean[] ra = ExtractSVReads.hasReadAlignmentConsistentWithReference(ImmutableList.of(r));
System.out.print(" HasConcRead=" + (ra[0] ? "y" : "n") + (ra[1] ? "y" : "n"));
System.out.println(" " + new ChimericAlignment(r).toString());
}
*/
Assert.assertEquals(records.stream().map(r -> r.getSecondOfPairFlag()).collect(Collectors.toList()), result);
lookup.close();
}
Aggregations