use of au.edu.wehi.idsv.util.AsyncBufferedIterator in project gridss by PapenfussLab.
the class SequentialCoverageAnnotator method createLookup.
private List<ReferenceCoverageLookup> createLookup(ProcessingContext context, List<SAMEvidenceSource> sources, int windowSize) {
List<ReferenceCoverageLookup> result = new ArrayList<>();
for (SAMEvidenceSource ses : sources) {
assert (ses.getSourceCategory() >= 0);
assert (ses.getSourceCategory() < context.getCategoryCount());
// one read-ahead thread per input file
SamReader reader = SamReaderFactory.makeDefault().open(ses.getFile());
SAMRecordIterator rawIterator = reader.iterator();
rawIterator.assertSorted(SortOrder.coordinate);
CloseableIterator<SAMRecord> sit = new AsyncBufferedIterator<SAMRecord>(rawIterator, ses.getFile().getName() + "-Coverage");
// close the async iterator first to prevent aysnc reading from a closed stream
toclose.add(sit);
toclose.add(rawIterator);
toclose.add(reader);
sit = new ProgressLoggingSAMRecordIterator(sit, new ProgressLogger(log, 10000000));
SequentialReferenceCoverageLookup sourceLookup = new SequentialReferenceCoverageLookup(sit, ses.getMetrics().getIdsvMetrics(), ses.getReadPairConcordanceCalculator(), windowSize, ses.getSourceCategory(), context.isFilterDuplicates());
context.registerBuffer(ses.getFile().getName(), sourceLookup);
result.add(sourceLookup);
}
return result;
}
use of au.edu.wehi.idsv.util.AsyncBufferedIterator in project gridss by PapenfussLab.
the class SplitReadRealigner method createSupplementaryAlignments.
public void createSupplementaryAlignments(StreamingAligner aligner, File input, File output) throws IOException {
SplitReadFastqExtractor rootExtractor = new SplitReadFastqExtractor(false, minSoftClipLength, minSoftClipQuality, isProcessSecondaryAlignments(), eidgen);
SplitReadFastqExtractor recursiveExtractor = new SplitReadFastqExtractor(true, minSoftClipLength, minSoftClipQuality, false, eidgen);
Map<String, SplitReadRealignmentInfo> realignments = new HashMap<>();
try (SamReader reader = readerFactory.open(input)) {
SAMFileHeader header = reader.getFileHeader().clone();
header.setSortOrder(SortOrder.unsorted);
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, output)) {
try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(reader.iterator(), input.getName())) {
while (bufferedIt.hasNext()) {
SAMRecord r = bufferedIt.next();
processInputRecord(aligner, rootExtractor, realignments, writer, r);
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
}
// flush out all realignments
aligner.flush();
while (aligner.hasAlignmentRecord()) {
// perform nested realignment
while (aligner.hasAlignmentRecord()) {
processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
}
aligner.flush();
}
}
}
}
assert (realignments.size() == 0);
}
use of au.edu.wehi.idsv.util.AsyncBufferedIterator in project gridss by PapenfussLab.
the class VariantCaller method callChunk.
private void callChunk(File output, AggregateEvidenceSource es, int chunkNumber, QueryInterval[] chunk) {
String chunkMsg = String.format("chunk %d (%s:%d-%s:%d)", chunkNumber, processContext.getDictionary().getSequence(chunk[0].referenceIndex).getSequenceName(), chunk[0].start, processContext.getDictionary().getSequence(chunk[chunk.length - 1].referenceIndex).getSequenceName(), chunk[chunk.length - 1].end);
String msg = "calling maximal cliques in " + chunkMsg;
File tmp = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(output) : output;
try (VariantCallIterator rawit = new VariantCallIterator(es, chunk, chunkNumber)) {
try (VariantContextWriter vcfWriter = processContext.getVariantContextWriter(tmp, false)) {
log.info("Start ", msg);
try (AsyncBufferedIterator<VariantContextDirectedBreakpoint> it = new AsyncBufferedIterator<>(rawit, "VariantCaller " + chunkMsg)) {
while (it.hasNext()) {
VariantContextDirectedBreakpoint loc = it.next();
if (loc.getBreakendQual() >= processContext.getVariantCallingParameters().minScore || processContext.getVariantCallingParameters().writeFiltered) {
// If we're under min score with all possible evidence allocated, we're definitely going to fail
// when we restrict evidence to single breakpoint support
vcfWriter.add(loc);
}
}
}
}
}
try {
if (tmp != output) {
FileHelper.move(tmp, output, true);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
log.info("Complete ", msg);
if (gridss.Defaults.DELETE_TEMPORARY_FILES) {
try {
if (tmp != output) {
FileHelper.delete(tmp, true);
}
} catch (IOException e) {
log.warn(e, "Error removing intermediate file ", tmp.getAbsolutePath());
}
}
if (gridss.Defaults.DEFENSIVE_GC) {
log.debug("Requesting defensive GC to ensure OS file handles are closed");
System.gc();
System.runFinalization();
}
}
use of au.edu.wehi.idsv.util.AsyncBufferedIterator in project gridss by PapenfussLab.
the class ComputeSamTags method compute.
public static void compute(Iterator<SAMRecord> rawit, SAMFileWriter writer, ReferenceLookup reference, Set<String> tags, boolean softenHardClips, boolean fixMates, boolean recalculateSupplementary, String threadprefix) throws IOException {
ProgressLogger progress = new ProgressLogger(log);
try (CloseableIterator<SAMRecord> aysncit = new AsyncBufferedIterator<SAMRecord>(rawit, threadprefix + "raw")) {
Iterator<SAMRecord> it = aysncit;
if (tags.contains(SAMTag.NM.name()) || tags.contains(SAMTag.SA.name())) {
it = new AsyncBufferedIterator<SAMRecord>(it, threadprefix + "nm");
it = new NmTagIterator(it, reference);
}
if (!Sets.intersection(tags, SAMRecordUtil.TEMPLATE_TAGS).isEmpty() || softenHardClips) {
it = new TemplateTagsIterator(it, softenHardClips, fixMates, recalculateSupplementary, tags);
it = new AsyncBufferedIterator<SAMRecord>(it, threadprefix + "tags");
}
while (it.hasNext()) {
SAMRecord r = it.next();
writer.addAlignment(r);
progress.record(r);
}
}
}
use of au.edu.wehi.idsv.util.AsyncBufferedIterator in project gridss by PapenfussLab.
the class ByReadNameSinglePassSamProgram method makeItSo.
public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<ByReadNameSinglePassSamProgram> programs) throws FileNotFoundException {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceLookup lookup;
if (referenceSequence == null) {
lookup = null;
} else {
IOUtil.assertFileIsReadable(referenceSequence);
lookup = new TwoBitBufferedReferenceSequenceFile(new IndexedFastaSequenceFile(referenceSequence));
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), lookup.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.queryname) {
if (assumeSorted) {
log.warn("File reports sort order '" + sort + "', assuming it's queryname sorted anyway.");
} else {
throw new PicardException("File " + input.getAbsolutePath() + " should be queryname sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be queryname sorted you may pass ASSUME_SORTED=true");
}
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.setReference(lookup);
program.setup(in.getFileHeader(), input);
}
final ProgressLogger progress = new ProgressLogger(log);
final SAMRecordIterator rawit = in.iterator();
final CloseableIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(rawit, "ByReadNameSinglePassSamProgram " + input.getName());
try {
List<SAMRecord> currentRecords = new ArrayList<>();
String currentReadName = null;
while (it.hasNext()) {
SAMRecord r = it.next();
String readname = r.getReadName();
// if read name we have to just treat it as a single read
if (readname == null || !readname.equals(currentReadName)) {
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
currentRecords.clear();
currentReadName = readname;
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
}
currentRecords.add(r);
progress.record(r);
}
if (currentRecords.size() > 0) {
for (final ByReadNameSinglePassSamProgram program : programs) {
program.acceptFragment(currentRecords, lookup);
}
}
} finally {
CloserUtil.close(it);
CloserUtil.close(rawit);
CloserUtil.close(in);
}
for (final ByReadNameSinglePassSamProgram program : programs) {
program.finish();
}
}
Aggregations