Search in sources :

Example 16 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class SplitReadRealigner method createSupplementaryAlignments.

public void createSupplementaryAlignments(StreamingAligner aligner, File input, File output) throws IOException {
    SplitReadFastqExtractor rootExtractor = new SplitReadFastqExtractor(false, minSoftClipLength, minSoftClipQuality, isProcessSecondaryAlignments(), eidgen);
    SplitReadFastqExtractor recursiveExtractor = new SplitReadFastqExtractor(true, minSoftClipLength, minSoftClipQuality, false, eidgen);
    Map<String, SplitReadRealignmentInfo> realignments = new HashMap<>();
    try (SamReader reader = readerFactory.open(input)) {
        SAMFileHeader header = reader.getFileHeader().clone();
        header.setSortOrder(SortOrder.unsorted);
        try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, output)) {
            try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(reader.iterator(), input.getName())) {
                while (bufferedIt.hasNext()) {
                    SAMRecord r = bufferedIt.next();
                    processInputRecord(aligner, rootExtractor, realignments, writer, r);
                    while (aligner.hasAlignmentRecord()) {
                        processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
                    }
                }
                // flush out all realignments
                aligner.flush();
                while (aligner.hasAlignmentRecord()) {
                    // perform nested realignment
                    while (aligner.hasAlignmentRecord()) {
                        processAlignmentRecord(aligner, recursiveExtractor, realignments, writer);
                    }
                    aligner.flush();
                }
            }
        }
    }
    assert (realignments.size() == 0);
}
Also used : SamReader(htsjdk.samtools.SamReader) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 17 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class SmithWatermanFastqAligner method align.

@Override
public void align(File fastq, File output, File reference, int threads) throws IOException {
    try (ReferenceSequenceFile ref = new IndexedFastaSequenceFile(reference)) {
        SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        byte[] bases = ref.getSequence(ref.getSequenceDictionary().getSequence(referenceIndex).getSequenceName()).getBases();
        try (FastqReader reader = new FastqReader(fastq)) {
            try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, output)) {
                for (FastqRecord fqr : reader) {
                    Alignment aln = aligner.align_smith_waterman(fqr.getReadString().getBytes(), bases);
                    SAMRecord r = new SAMRecord(header);
                    r.setReadName(fqr.getReadName());
                    r.setReferenceIndex(referenceIndex);
                    r.setAlignmentStart(aln.getStartPosition() + 1);
                    r.setCigarString(aln.getCigar());
                    r.setReadBases(fqr.getReadString().getBytes());
                    r.setBaseQualities(SAMUtils.fastqToPhred(fqr.getBaseQualityString()));
                    writer.addAlignment(r);
                }
            }
        }
    }
}
Also used : FastqReader(htsjdk.samtools.fastq.FastqReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) FastqRecord(htsjdk.samtools.fastq.FastqRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 18 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class ComputeSamTags method doWork.

@Override
protected int doWork() {
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    validateParameters();
    SamReaderFactory readerFactory = SamReaderFactory.make();
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
    try {
        try (SamReader reader = readerFactory.open(INPUT)) {
            SAMFileHeader header = reader.getFileHeader();
            if (!ASSUME_SORTED) {
                if (header.getSortOrder() != SortOrder.queryname) {
                    log.error("INPUT is not sorted by queryname. " + "ComputeSamTags requires that reads with the same name be sorted together. " + "If the input file satisfies this constraint (the output from many aligners do)," + " this check can be disabled with the ASSUME_SORTED option.");
                    return -1;
                }
            }
            try (SAMRecordIterator it = reader.iterator()) {
                File tmpoutput = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(OUTPUT, "gridss.tmp.ComputeSamTags.") : OUTPUT;
                try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpoutput)) {
                    compute(it, writer, getReference(), TAGS, SOFTEN_HARD_CLIPS, FIX_MATE_INFORMATION, RECALCULATE_SA_SUPPLEMENTARY, INPUT.getName() + "-");
                }
                if (tmpoutput != OUTPUT) {
                    FileHelper.move(tmpoutput, OUTPUT, true);
                }
            }
        }
    } catch (IOException e) {
        log.error(e);
        return -1;
    }
    return 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 19 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class AssemblyEvidenceSource method assembleChunk.

private void assembleChunk(File output, int chunkNumber, QueryInterval[] qi) throws IOException {
    AssemblyIdGenerator assemblyNameGenerator = new SequentialIdGenerator(String.format("asm%d-", chunkNumber));
    String chuckName = String.format("chunk %d (%s:%d-%s:%d)", chunkNumber, getContext().getDictionary().getSequence(qi[0].referenceIndex).getSequenceName(), qi[0].start, getContext().getDictionary().getSequence(qi[qi.length - 1].referenceIndex).getSequenceName(), qi[qi.length - 1].end);
    log.info(String.format("Starting assembly on %s", chuckName));
    Stopwatch timer = Stopwatch.createStarted();
    SAMFileHeader header = getContext().getBasicSamHeader();
    // TODO: add assembly @PG header
    File filteredout = FileSystemContext.getWorkingFileFor(output, "filtered.");
    File tmpout = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.");
    try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, tmpout)) {
        if (getContext().getAssemblyParameters().writeFiltered) {
            try (SAMFileWriter filteredWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, filteredout)) {
                for (BreakendDirection direction : BreakendDirection.values()) {
                    assembleChunk(writer, filteredWriter, chunkNumber, qi, direction, assemblyNameGenerator);
                }
            }
        } else {
            for (BreakendDirection direction : BreakendDirection.values()) {
                assembleChunk(writer, null, chunkNumber, qi, direction, assemblyNameGenerator);
            }
        }
    } catch (Exception e) {
        log.error(e, "Error assembling ", chuckName);
        if (getContext().getConfig().terminateOnFirstError) {
            System.exit(1);
        }
        throw e;
    } finally {
        timer.stop();
        log.info(String.format("Completed assembly on %s in %ds (%s)", chuckName, timer.elapsed(TimeUnit.SECONDS), timer.toString()));
    }
    SAMFileUtil.sort(getContext().getFileSystemContext(), tmpout, output, SortOrder.coordinate);
    if (gridss.Defaults.DELETE_TEMPORARY_FILES) {
        tmpout.delete();
        filteredout.delete();
    }
    if (gridss.Defaults.DEFENSIVE_GC) {
        log.debug("Requesting defensive GC to ensure OS file handles are closed");
        System.gc();
        System.runFinalization();
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) Stopwatch(com.google.common.base.Stopwatch) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) IOException(java.io.IOException)

Example 20 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar154220 method doWork.

@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
    SAMFileHeader header = in.getFileHeader();
    if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
        LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
        return -1;
    }
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        LOG.error("no dict !");
        return -1;
    }
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    int prev_tid = -1;
    int[] depth_array = null;
    try {
        SAMFileHeader header2 = header.clone();
        header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
        out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        iter = in.iterator();
        List<SAMRecord> buffer = new ArrayList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
            }
            if (rec != null && rec.getReadUnmappedFlag()) {
                out.addAlignment(rec);
                continue;
            }
            // no more record or
            if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
                buffer.add(progress.watch(rec));
            } else if (buffer.isEmpty() && rec != null) {
                buffer.add(progress.watch(rec));
            } else // dump buffer
            {
                if (!buffer.isEmpty()) {
                    final int tid = buffer.get(0).getReferenceIndex();
                    if (prev_tid == -1 || prev_tid != tid) {
                        SAMSequenceRecord ssr = dict.getSequence(tid);
                        prev_tid = tid;
                        depth_array = null;
                        System.gc();
                        LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
                        // use a +1 pos
                        depth_array = new int[ssr.getSequenceLength() + 1];
                        Arrays.fill(depth_array, 0);
                    }
                    // position->coverage for this set of reads
                    Counter<Integer> readposition2coverage = new Counter<Integer>();
                    boolean dump_this_buffer = true;
                    for (final SAMRecord sr : buffer) {
                        if (!dump_this_buffer)
                            break;
                        if (this.filter.filterOut(sr))
                            continue;
                        final Cigar cigar = sr.getCigar();
                        if (cigar == null) {
                            throw new IOException("Cigar missing in " + rec.getSAMString());
                        }
                        int refPos1 = sr.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
                                    int cov = (int) readposition2coverage.incr(refPos1 + x);
                                    if (depth_array[refPos1 + x] + cov > this.capDepth) {
                                        dump_this_buffer = false;
                                        break;
                                    }
                                }
                            }
                            if (!dump_this_buffer)
                                break;
                            refPos1 += ce.getLength();
                        }
                    }
                    if (dump_this_buffer) {
                        // consumme this coverage
                        for (Integer pos : readposition2coverage.keySet()) {
                            depth_array[pos] += (int) readposition2coverage.count(pos);
                        }
                        for (SAMRecord sr : buffer) {
                            out.addAlignment(sr);
                        }
                    }
                    buffer.clear();
                }
                if (rec == null)
                    break;
                buffer.add(rec);
            }
        }
        depth_array = null;
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(out);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8