Search in sources :

Example 31 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamAddPI method doWork.

@Override
public int doWork(final List<String> args) {
    final Map<String, List<Integer>> rg2insertsize = new HashMap<>();
    SamReader sfr = null;
    SamReader sfrTmp = null;
    SAMFileWriter sfw = null;
    File tmpBam = null;
    SAMFileWriter tmpBamWriter = null;
    SAMFileWriter outWriter = null;
    CloseableIterator<SAMRecord> iter = null;
    CloseableIterator<SAMRecord> iterTmp = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            if (!overwrite_existing && rg.getPredictedMedianInsertSize() != null) {
                continue;
            }
            rg2insertsize.put(rg.getId(), new ArrayList<>(num_read_to_test < 1L ? 10000 : num_read_to_test));
        }
        tmpBam = File.createTempFile("__addpi", ".bam");
        tmpBamWriter = this.writingBamArgs.openSAMFileWriter(tmpBam, header, true);
        iter = sfr.iterator();
        int n_processed = 0;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext() && (this.num_read_to_test < 0 || n_processed < this.num_read_to_test)) {
            final SAMRecord rec = progress.watch(iter.next());
            tmpBamWriter.addAlignment(rec);
            final SAMReadGroupRecord rg = rec.getReadGroup();
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null)
                continue;
            if (rec.getReadUnmappedFlag())
                continue;
            if (!rec.getReadPairedFlag())
                continue;
            if (!rec.getFirstOfPairFlag())
                continue;
            if (rec.getMateUnmappedFlag())
                continue;
            if (this.samRecordFilter.filterOut(rec))
                continue;
            final int len = rec.getInferredInsertSize();
            if (len == 0)
                continue;
            insertlist.add(Math.abs(len));
            ++n_processed;
        }
        tmpBamWriter.close();
        tmpBamWriter = null;
        // reopen tmp file
        sfrTmp = super.createSamReaderFactory().open(tmpBam);
        iterTmp = sfrTmp.iterator();
        // update dMedianInsertSize
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null || insertlist.isEmpty())
                continue;
            rg.setPredictedMedianInsertSize((int) Percentile.median().evaluate(insertlist.stream().mapToDouble(I -> I.doubleValue())));
        }
        header.addComment("Processed with " + getClass().getSimpleName() + " " + getProgramCommandLine());
        outWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        while (iterTmp.hasNext()) {
            outWriter.addAlignment(iterTmp.next());
        }
        iterTmp.close();
        iterTmp = null;
        sfrTmp.close();
        sfrTmp = null;
        tmpBam.delete();
        // finish writing original input
        while (iter.hasNext()) {
            outWriter.addAlignment(progress.watch(iter.next()));
        }
        progress.finish();
        iter.close();
        iter = null;
        sfr.close();
        sfr = null;
        outWriter.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(tmpBamWriter);
        if (tmpBam != null)
            tmpBam.delete();
        CloserUtil.close(outWriter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 32 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class ConcatSam method doWork.

@Override
public int doWork(final List<String> args) {
    SAMFileWriter out = null;
    ConcatSamIterator iter = null;
    try {
        final Factory factory = new Factory().setConcatenate(!this.merging);
        if (!StringUtil.isBlank(this.region_str)) {
            factory.addInterval(region_str);
        }
        iter = factory.open(args);
        out = this.writingBamArgs.openSAMFileWriter(outputFile, iter.getFileHeader(), true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(iter.getFileHeader()).logger(LOG);
        while (iter.hasNext()) {
            out.addAlignment(progress.watch(iter.next()));
        }
        iter.close();
        iter = null;
        out.close();
        out = null;
        progress.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(iter);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMException(htsjdk.samtools.SAMException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Example 33 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class BamLiftOver method doWork.

@Override
public int doWork(final List<String> args) {
    final double minMatch = (this.userMinMatch <= 0.0 ? LiftOver.DEFAULT_LIFTOVER_MINMATCH : this.userMinMatch);
    if (this.liftOverFile == null) {
        LOG.error("LiftOver file is undefined.");
        return -1;
    }
    if (this.faidx == null) {
        LOG.error("New Sequence Dictionary file is undefined.");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        LOG.info("Reading " + liftOverFile);
        LiftOver liftOver = new LiftOver(liftOverFile);
        liftOver.setLiftOverMinMatch(minMatch);
        final SAMSequenceDictionary newDict = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
        sfr = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader headerIn = sfr.getFileHeader();
        final SAMFileHeader headerOut = headerIn.clone();
        headerOut.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, headerOut, true);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final SAMRecord copy = (SAMRecord) rec.clone();
            copy.setHeader(headerOut);
            final StringBuilder sb = new StringBuilder();
            if (!rec.getReadUnmappedFlag()) {
                final String chrom = rec.getReferenceName();
                int pos = rec.getAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getReadNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getReadNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setReferenceName(ssr.getSequenceName());
                    copy.setReferenceIndex(ssr.getSequenceIndex());
                    copy.setAlignmentStart(interval.getStart());
                    copy.setReadNegativeStrandFlag(interval.isNegativeStrand());
                    if (rec.getReadNegativeStrandFlag() != copy.getReadNegativeStrandFlag()) {
                        copy.setReadString(AcidNucleics.reverseComplement(rec.getReadString()));
                        byte[] qual = rec.getBaseQualities();
                        byte[] quals2 = new byte[qual.length];
                        for (int i = 0; i < qual.length; ++i) {
                            quals2[i] = qual[(qual.length - 1) - i];
                        }
                        copy.setBaseQualities(quals2);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                sb.append("/");
                String chrom = rec.getMateReferenceName();
                int pos = rec.getMateAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getMateNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getMateNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setMateReferenceName(ssr.getSequenceName());
                    copy.setMateReferenceIndex(ssr.getSequenceIndex());
                    copy.setMateAlignmentStart(interval.getStart());
                    copy.setMateNegativeStrandFlag(interval.isNegativeStrand());
                    if (!copy.getReadUnmappedFlag() && copy.getReferenceIndex() == copy.getMateReferenceIndex()) // && copy.getReadNegativeStrandFlag()!=copy.getMateNegativeStrandFlag()
                    {
                    // don't change ?
                    } else {
                        copy.setProperPairFlag(false);
                        copy.setInferredInsertSize(0);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (sb.length() > 0)
                copy.setAttribute("LO", sb.toString());
            sfw.addAlignment(copy);
        }
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 34 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamSlop method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    if (this.defaultQual.length() != 1) {
        LOG.error("default quality should have length==1 " + this.defaultQual);
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    final char defaultQUAL = this.defaultQual.charAt(0);
    try {
        final String inputName = oneFileOrNull(args);
        LOG.info("Loading reference");
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(inputName);
        final SAMFileHeader header = sfr.getFileHeader();
        header.setSortOrder(SortOrder.unsorted);
        sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Cigar cigar = rec.getCigar();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
                sfw.addAlignment(rec);
                continue;
            }
            final StringBuilder sbs = new StringBuilder(rec.getReadString());
            final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
            final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
            final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
            if (!this.removeClip) {
                // replace clip S/H by match M
                for (int i = 0; i < cl.size(); ++i) {
                    final CigarElement ce = cl.get(i);
                    if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
                        cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
                    }
                }
            }
            if (this.extend5 > 0 && refPos1 > 1) {
                if (this.removeClip) {
                    // /remove hard + soft clip 5'
                    while (!cl.isEmpty()) {
                        // first
                        final CigarElement ce = cl.get(0);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.replace(0, ce.getLength(), "");
                            sbq.replace(0, ce.getLength(), "");
                        }
                        // remove first
                        cl.remove(0);
                    }
                }
                final StringBuilder prefix = new StringBuilder(this.extend5);
                // /append + soft clip 5'
                for (int i = 0; i < this.extend5; ++i) {
                    int x1 = (refPos1 - 1) - i;
                    // break if out of genome
                    if (x1 < 1)
                        break;
                    prefix.insert(0, genomicSequence.charAt(x1 - 1));
                }
                sbs.insert(0, prefix.toString());
                // preprend quality
                for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
                // prepend cigar
                cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
                // update start pos
                rec.setAlignmentStart(refPos1 - prefix.length());
            }
            if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
                if (this.removeClip) {
                    // /remove hard + soft clip 3'
                    while (!cl.isEmpty()) {
                        // last
                        final CigarElement ce = cl.get(cl.size() - 1);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.setLength(sbs.length() - ce.getLength());
                            sbq.setLength(sbq.length() - ce.getLength());
                        }
                        // remove last
                        cl.remove(cl.size() - 1);
                    }
                }
                int extend = 0;
                for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
                    sbs.append(genomicSequence.charAt(pos1 - 1));
                    sbq.append(defaultQUAL);
                    ++extend;
                }
                // append cigar
                cl.add(new CigarElement(extend, CigarOperator.M));
            }
            // simplify cigar
            int idx = 0;
            while (idx + 1 < cl.size()) {
                final CigarElement ce1 = cl.get(idx);
                final CigarElement ce2 = cl.get(idx + 1);
                if (ce1.getOperator() == ce2.getOperator()) {
                    cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
                    cl.remove(idx + 1);
                } else {
                    idx++;
                }
            }
            rec.setCigar(new Cigar(cl));
            rec.setReadString(sbs.toString());
            rec.setBaseQualityString(sbq.toString());
            final List<SAMValidationError> errors = rec.isValid();
            if (errors != null && !errors.isEmpty()) {
                for (SAMValidationError err : errors) {
                    LOG.error(err.getMessage());
                }
            }
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 35 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamToJson method call.

private int call(final String inputName) throws Exception {
    PrintWriter out = null;
    SamReader sfr = null;
    final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
    SAMFileWriter swf = null;
    try {
        sfr = super.openSamReader(inputName);
        out = super.openFileOrStdoutAsPrintWriter(this.output);
        swf = factory.open(sfr.getFileHeader(), out);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext() && !out.checkError()) {
            swf.addAlignment(iter.next());
        }
        iter.close();
        out.flush();
        swf.close();
        swf = null;
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(swf);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamJsonWriterFactory(com.github.lindenb.jvarkit.util.samtools.SamJsonWriterFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) PrintWriter(java.io.PrintWriter)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8