Search in sources :

Example 46 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamLiftOver method doWork.

@Override
public int doWork(final List<String> args) {
    final double minMatch = (this.userMinMatch <= 0.0 ? LiftOver.DEFAULT_LIFTOVER_MINMATCH : this.userMinMatch);
    if (this.liftOverFile == null) {
        LOG.error("LiftOver file is undefined.");
        return -1;
    }
    if (this.faidx == null) {
        LOG.error("New Sequence Dictionary file is undefined.");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        LOG.info("Reading " + liftOverFile);
        LiftOver liftOver = new LiftOver(liftOverFile);
        liftOver.setLiftOverMinMatch(minMatch);
        final SAMSequenceDictionary newDict = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
        sfr = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader headerIn = sfr.getFileHeader();
        final SAMFileHeader headerOut = headerIn.clone();
        headerOut.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, headerOut, true);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final SAMRecord copy = (SAMRecord) rec.clone();
            copy.setHeader(headerOut);
            final StringBuilder sb = new StringBuilder();
            if (!rec.getReadUnmappedFlag()) {
                final String chrom = rec.getReferenceName();
                int pos = rec.getAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getReadNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getReadNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setReferenceName(ssr.getSequenceName());
                    copy.setReferenceIndex(ssr.getSequenceIndex());
                    copy.setAlignmentStart(interval.getStart());
                    copy.setReadNegativeStrandFlag(interval.isNegativeStrand());
                    if (rec.getReadNegativeStrandFlag() != copy.getReadNegativeStrandFlag()) {
                        copy.setReadString(AcidNucleics.reverseComplement(rec.getReadString()));
                        byte[] qual = rec.getBaseQualities();
                        byte[] quals2 = new byte[qual.length];
                        for (int i = 0; i < qual.length; ++i) {
                            quals2[i] = qual[(qual.length - 1) - i];
                        }
                        copy.setBaseQualities(quals2);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                sb.append("/");
                String chrom = rec.getMateReferenceName();
                int pos = rec.getMateAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getMateNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getMateNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setMateReferenceName(ssr.getSequenceName());
                    copy.setMateReferenceIndex(ssr.getSequenceIndex());
                    copy.setMateAlignmentStart(interval.getStart());
                    copy.setMateNegativeStrandFlag(interval.isNegativeStrand());
                    if (!copy.getReadUnmappedFlag() && copy.getReferenceIndex() == copy.getMateReferenceIndex()) // && copy.getReadNegativeStrandFlag()!=copy.getMateNegativeStrandFlag()
                    {
                    // don't change ?
                    } else {
                        copy.setProperPairFlag(false);
                        copy.setInferredInsertSize(0);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (sb.length() > 0)
                copy.setAttribute("LO", sb.toString());
            sfw.addAlignment(copy);
        }
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 47 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamSlop method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    if (this.defaultQual.length() != 1) {
        LOG.error("default quality should have length==1 " + this.defaultQual);
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    final char defaultQUAL = this.defaultQual.charAt(0);
    try {
        final String inputName = oneFileOrNull(args);
        LOG.info("Loading reference");
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(inputName);
        final SAMFileHeader header = sfr.getFileHeader();
        header.setSortOrder(SortOrder.unsorted);
        sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Cigar cigar = rec.getCigar();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
                sfw.addAlignment(rec);
                continue;
            }
            final StringBuilder sbs = new StringBuilder(rec.getReadString());
            final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
            final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
            final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
            if (!this.removeClip) {
                // replace clip S/H by match M
                for (int i = 0; i < cl.size(); ++i) {
                    final CigarElement ce = cl.get(i);
                    if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
                        cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
                    }
                }
            }
            if (this.extend5 > 0 && refPos1 > 1) {
                if (this.removeClip) {
                    // /remove hard + soft clip 5'
                    while (!cl.isEmpty()) {
                        // first
                        final CigarElement ce = cl.get(0);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.replace(0, ce.getLength(), "");
                            sbq.replace(0, ce.getLength(), "");
                        }
                        // remove first
                        cl.remove(0);
                    }
                }
                final StringBuilder prefix = new StringBuilder(this.extend5);
                // /append + soft clip 5'
                for (int i = 0; i < this.extend5; ++i) {
                    int x1 = (refPos1 - 1) - i;
                    // break if out of genome
                    if (x1 < 1)
                        break;
                    prefix.insert(0, genomicSequence.charAt(x1 - 1));
                }
                sbs.insert(0, prefix.toString());
                // preprend quality
                for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
                // prepend cigar
                cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
                // update start pos
                rec.setAlignmentStart(refPos1 - prefix.length());
            }
            if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
                if (this.removeClip) {
                    // /remove hard + soft clip 3'
                    while (!cl.isEmpty()) {
                        // last
                        final CigarElement ce = cl.get(cl.size() - 1);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.setLength(sbs.length() - ce.getLength());
                            sbq.setLength(sbq.length() - ce.getLength());
                        }
                        // remove last
                        cl.remove(cl.size() - 1);
                    }
                }
                int extend = 0;
                for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
                    sbs.append(genomicSequence.charAt(pos1 - 1));
                    sbq.append(defaultQUAL);
                    ++extend;
                }
                // append cigar
                cl.add(new CigarElement(extend, CigarOperator.M));
            }
            // simplify cigar
            int idx = 0;
            while (idx + 1 < cl.size()) {
                final CigarElement ce1 = cl.get(idx);
                final CigarElement ce2 = cl.get(idx + 1);
                if (ce1.getOperator() == ce2.getOperator()) {
                    cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
                    cl.remove(idx + 1);
                } else {
                    idx++;
                }
            }
            rec.setCigar(new Cigar(cl));
            rec.setReadString(sbs.toString());
            rec.setBaseQualityString(sbq.toString());
            final List<SAMValidationError> errors = rec.isValid();
            if (errors != null && !errors.isEmpty()) {
                for (SAMValidationError err : errors) {
                    LOG.error(err.getMessage());
                }
            }
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 48 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamToJson method call.

private int call(final String inputName) throws Exception {
    PrintWriter out = null;
    SamReader sfr = null;
    final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
    SAMFileWriter swf = null;
    try {
        sfr = super.openSamReader(inputName);
        out = super.openFileOrStdoutAsPrintWriter(this.output);
        swf = factory.open(sfr.getFileHeader(), out);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext() && !out.checkError()) {
            swf.addAlignment(iter.next());
        }
        iter.close();
        out.flush();
        swf.close();
        swf = null;
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(swf);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamJsonWriterFactory(com.github.lindenb.jvarkit.util.samtools.SamJsonWriterFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) PrintWriter(java.io.PrintWriter)

Example 49 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SortSamRefName method doWork.

@Override
public int doWork(List<String> args) {
    SamReader in = null;
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    CloseableIterator<SAMRecord> iter2 = null;
    SortingCollection<SAMRecord> sorter = null;
    try {
        in = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = in.getFileHeader();
        final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(header);
        final RefNameComparator refNameComparator = new RefNameComparator();
        sorter = SortingCollection.newInstance(SAMRecord.class, bamRecordCodec, refNameComparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sorter.setDestructiveIteration(true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        iter = in.iterator();
        while (iter.hasNext()) {
            sorter.add(progress.watch(iter.next()));
        }
        in.close();
        in = null;
        sorter.doneAdding();
        final SAMFileHeader header2 = header.clone();
        header2.addComment(getProgramName() + " " + getVersion() + " " + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        out = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
        iter2 = sorter.iterator();
        while (iter2.hasNext()) {
            out.addAlignment(iter2.next());
        }
        out.close();
        out = null;
        sorter.cleanup();
        progress.finish();
        LOG.info("done");
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter2);
        CloserUtil.close(iter);
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IOException(java.io.IOException)

Example 50 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Sam2Tsv method scan.

private void scan(final SamReader r) {
    final Row row = new Row();
    SAMRecordIterator iter = null;
    try {
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader());
        iter = r.iterator();
        while (iter.hasNext()) {
            row.rec = progress.watch(iter.next());
            printAln(row);
            if (this.out.checkError())
                break;
        }
        progress.finish();
    } catch (final Exception err) {
        LOG.error("scan error:", err);
        throw new RuntimeException(String.valueOf(err.getMessage()), err);
    } finally {
        CloserUtil.close(iter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14