Search in sources :

Example 76 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamMaskAlignedBases method doWork.

@Override
public int doWork(List<String> args) {
    final byte RESET_CHAR = (byte) 'N';
    final byte RESET_QUAL = (byte) SAMUtils.fastqToPhred('#');
    long nRecords = 0L;
    long nRecordMasked = 0L;
    long nBasesMasked = 0L;
    long nBases = 0L;
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            ++nRecords;
            nBases += rec.getReadLength();
            if (rec.getReadUnmappedFlag()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            if (rec.isSecondaryOrSupplementary()) {
                continue;
            }
            final Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            byte[] quals = rec.getBaseQualities();
            if (cigar == null || cigar.isEmpty()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            int readpos = 0;
            for (final CigarElement ce : cigar) {
                final CigarOperator op = ce.getOperator();
                if (op.consumesReadBases()) {
                    if (op.consumesReferenceBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            if (bases != null)
                                bases[readpos + x] = RESET_CHAR;
                            if (quals != null)
                                quals[readpos + x] = RESET_QUAL;
                            ++nBasesMasked;
                        }
                    }
                    readpos += ce.getLength();
                }
            }
            ++nRecordMasked;
            SAMUtils.makeReadUnmapped(rec);
            sfw.addAlignment(rec);
        }
        iter.close();
        sfw.close();
        progress.finish();
        LOG.info("done : reads masked " + nRecordMasked + "/" + nRecords + " Bases masked:" + nBasesMasked + "/" + nBases);
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 77 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamRetrieveSeqAndQual method doWork.

@Override
public int doWork(List<String> args) {
    FastqReader[] fastqReaders = null;
    SamReader samReader = null;
    SAMFileWriter samWriter = null;
    SAMRecordIterator iter = null;
    try {
        if (fastqFin == null) {
            LOG.error("undefined fastq file");
            return -1;
        } else {
            LOG.info("opening " + fastqFin);
            FastqReader r1 = new FastqReader(fastqFin);
            if (fastqRin == null) {
                fastqReaders = new FastqReader[] { r1 };
            } else {
                LOG.info("opening " + fastqRin);
                FastqReader r2 = new FastqReader(fastqRin);
                fastqReaders = new FastqReader[] { r1, r2 };
            }
        }
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
        if (sortOrder == null) {
            LOG.warning("undefined sort order read are in the sam order");
        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
            LOG.error("Bad Sort Order. Sort this input on read name");
            return -1;
        }
        SAMFileHeader header = samReader.getFileHeader().clone();
        SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getVersion());
        samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        FastqRecord[] currFastq = new FastqRecord[] { null, null };
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            String readName = rec.getReadName();
            int fastq_index = 0;
            if (rec.getReadPairedFlag()) {
                if (fastqReaders.length != 2) {
                    LOG.error("Not paired but number of fastq!=2");
                    return -1;
                }
                fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
            } else {
                if (fastqReaders.length != 1) {
                    LOG.error("Not paired but number of fastq!=1");
                    return -1;
                }
                fastq_index = 0;
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
                while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
                    if (!fastqReaders[fastq_index].hasNext()) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                    currFastq[fastq_index] = fastqReaders[fastq_index].next();
                    if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                }
            } else {
                if (!fastqReaders[fastq_index].hasNext()) {
                    LOG.error("Read Missing for " + readName);
                    return -1;
                }
                currFastq[fastq_index] = fastqReaders[fastq_index].next();
            }
            if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
                LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
                return -1;
            }
            String fastqBases = currFastq[fastq_index].getReadString();
            String fastqQuals = currFastq[fastq_index].getBaseQualityString();
            /* handle orientation */
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                fastqBases = AcidNucleics.reverseComplement(fastqBases);
                StringBuilder sb = new StringBuilder(fastqQuals.length());
                for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
                fastqQuals = sb.toString();
            }
            /* remove hard clip */
            Cigar cigar = rec.getCigar();
            if (cigar != null) {
                List<CigarElement> ceList = cigar.getCigarElements();
                if (!ceList.isEmpty()) {
                    CigarElement ce = ceList.get(ceList.size() - 1);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
                        fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
                    }
                    ce = ceList.get(0);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(ce.getLength());
                        fastqQuals = fastqQuals.substring(ce.getLength());
                    }
                }
            }
            rec.setBaseQualityString(fastqQuals);
            rec.setReadString(fastqBases);
            samWriter.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqReaders != null)
            for (FastqReader r : fastqReaders) CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FastqRecord(htsjdk.samtools.fastq.FastqRecord) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) FastqReader(htsjdk.samtools.fastq.FastqReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 78 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamToPsl method scan.

private void scan(final SamReader in) {
    final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
    if (dict == null)
        throw new RuntimeException("Sequence dictionary missing...");
    final SAMRecordIterator iter = in.iterator();
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    while (iter.hasNext() && !this.out.checkError()) {
        final SAMRecord rec = progress.watch(iter.next());
        if (rec.getReadUnmappedFlag())
            continue;
        for (final PslAlign a : makePslAlign(rec, dict)) {
            out.println(toString(a, rec));
        }
    }
    progress.finish();
    iter.close();
}
Also used : PslAlign(com.github.lindenb.jvarkit.util.ucsc.PslAlign) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 79 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class FindNewSpliceSites method scan.

private void scan(SamReader in) {
    SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
    if (dict == null)
        throw new RuntimeException("Sequence dictionary missing");
    SAMRecordIterator iter = in.iterator();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    while (iter.hasNext()) {
        final SAMRecord rec = iter.next();
        if (rec.getReadUnmappedFlag())
            continue;
        if (rec.isSecondaryOrSupplementary())
            continue;
        progress.watch(rec);
        if (isWeird(rec, dict)) {
            this.weird.addAlignment(rec);
            continue;
        }
        for (CigarElement ce : rec.getCigar().getCigarElements()) {
            if (ce.getOperator().equals(CigarOperator.N)) {
                scanRead(rec, dict);
                break;
            }
        }
    }
    iter.close();
    progress.finish();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement)

Example 80 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SAM4WebLogo method doWork.

@Override
public int doWork(final List<String> args) {
    if (StringUtil.isBlank(this.regionStr)) {
        LOG.error("Undefined interval.");
        return -1;
    }
    final IntervalParser intervalParser = new IntervalParser().setFixContigName(true);
    PrintWriter out = null;
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    try {
        out = super.openFileOrStdoutAsPrintWriter(outputFile);
        for (final String inputName : IOUtils.unrollFiles(args)) {
            samReader = openSamReader(inputName);
            final Interval interval = intervalParser.setDictionary(samReader.getFileHeader().getSequenceDictionary()).parse(this.regionStr);
            if (interval == null) {
                LOG.error("Bad interval " + this.regionStr);
                return -1;
            }
            iter = samReader.query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.SamRecordFilter.filterOut(rec))
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.isEmpty())
                    continue;
                if (!rec.getReferenceName().equals(interval.getContig()))
                    continue;
                if (this.readEnd.apply(rec) < interval.getStart())
                    continue;
                if (this.readStart.apply(rec) > interval.getEnd())
                    continue;
                printRead(out, rec, interval);
            }
            iter.close();
            iter = null;
            samReader.close();
            samReader = null;
        }
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) Cigar(htsjdk.samtools.Cigar) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMRecord(htsjdk.samtools.SAMRecord) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14