Search in sources :

Example 31 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar76892 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        sfw = writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
        long nRecords = 0;
        final List<SAMRecord> buffer = new ArrayList<SAMRecord>();
        SAMRecordIterator iter = sfr.iterator();
        for (; ; ) {
            SAMRecord rec = null;
            // get next record
            if (iter.hasNext()) {
                rec = iter.next();
                ++nRecords;
                if (nRecords % 1000000 == 0)
                    LOG.info("records: " + nRecords);
                if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || rec.getProperPairFlag() || rec.getReferenceIndex() != rec.getMateReferenceIndex() || rec.getReadNegativeStrandFlag() == !rec.getMateNegativeStrandFlag()) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(rec);
                    continue;
                }
            }
            if (rec != null) {
                int i = 0;
                // cleanup buffer
                int mate_index = -1;
                while (i < buffer.size()) {
                    SAMRecord prev = buffer.get(i);
                    if (prev.getReferenceIndex() != rec.getReferenceIndex() || prev.getAlignmentEnd() + distance < rec.getAlignmentStart()) {
                        if (!onlySaveFixed)
                            sfw.addAlignment(prev);
                        buffer.remove(i);
                    } else if (prev.getReadName().equals(rec.getReadName()) && ((prev.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) || (rec.getFirstOfPairFlag() && prev.getSecondOfPairFlag()))) {
                        mate_index = i;
                        ++i;
                    } else {
                        ++i;
                    }
                }
                if (mate_index == -1) {
                    buffer.add(rec);
                } else {
                    final SAMRecord mate = buffer.get(mate_index);
                    buffer.remove(mate_index);
                    LOG.info("changing " + rec + " " + mate);
                    if (mate.getReadNegativeStrandFlag()) {
                        mate.setReadNegativeStrandFlag(false);
                        rec.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
                    } else {
                        rec.setReadNegativeStrandFlag(false);
                        mate.setMateNegativeStrandFlag(rec.getReadNegativeStrandFlag());
                    }
                    if (!mate.getReadFailsVendorQualityCheckFlag() && !rec.getReadFailsVendorQualityCheckFlag()) {
                        mate.setProperPairFlag(true);
                        rec.setProperPairFlag(true);
                    }
                    mate.setAttribute("rv", 1);
                    rec.setAttribute("rv", 1);
                    sfw.addAlignment(mate);
                    sfw.addAlignment(rec);
                }
            } else {
                for (final SAMRecord r : buffer) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(r);
                }
                break;
            }
        }
        LOG.info("done");
        sfw.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList)

Example 32 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar78400 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.XML == null) {
        LOG.error("XML file missing");
        return -1;
    }
    final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
        final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
        if (rgl.flowcells.isEmpty()) {
            LOG.error("empty XML " + XML);
            return -1;
        }
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader().clone();
        header.addComment("Processed with " + getProgramName());
        final Set<String> seenids = new HashSet<String>();
        final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
        for (final FlowCell fc : rgl.flowcells) {
            final Map<Integer, String> lane2id = new HashMap<Integer, String>();
            for (final Lane lane : fc.lanes) {
                for (final ReadGroup rg : lane.readGroups) {
                    if (seenids.contains(rg.id)) {
                        LOG.error("Group id " + rg.id + " defined twice");
                        return -1;
                    }
                    seenids.add(rg.id);
                    // create the read group we'll be using
                    final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
                    rgrec.setLibrary(rg.library);
                    rgrec.setPlatform(rg.platform);
                    rgrec.setSample(rg.sample);
                    rgrec.setPlatformUnit(rg.platformunit);
                    if (rg.center != null)
                        rgrec.setSequencingCenter(rg.center);
                    if (rg.description != null)
                        rgrec.setDescription(rg.description);
                    lane2id.put(lane.id, rg.id);
                    samReadGroupRecords.add(rgrec);
                }
            }
            if (flowcell2lane2id.containsKey(fc.name)) {
                LOG.error("FlowCell id " + fc.name + " defined twice in XML");
                return -1;
            }
            flowcell2lane2id.put(fc.name, lane2id);
        }
        header.setReadGroups(samReadGroupRecords);
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Matcher matcher = readNameSignature.matcher(rec.getReadName());
            final String flowcellStr;
            final String laneStr;
            if (matcher.matches()) {
                flowcellStr = matcher.group(1);
                laneStr = matcher.group(2);
            } else {
                LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
                return -1;
            }
            String RGID = null;
            final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
            if (lane2id == null)
                throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
            try {
                RGID = lane2id.get(Integer.parseInt(laneStr));
            } catch (final Exception e) {
                LOG.error("bad lane-Id in " + rec.getReadName());
                return -1;
            }
            if (RGID == null) {
                throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
            }
            rec.setAttribute(SAMTag.RG.name(), RGID);
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) SamReader(htsjdk.samtools.SamReader) Unmarshaller(javax.xml.bind.Unmarshaller) HashSet(java.util.HashSet) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) StreamSource(javax.xml.transform.stream.StreamSource) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Example 33 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class ExtendReferenceWithReads method scanRegion.

/**
 *scanRegion
 * @param contig    Reference sequence of interest.
 * @param start     0-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
 * @param end       0-based, exclusive end of interval of interest. Zero implies end of the reference sequence.
 */
private Map<Integer, Counter<Byte>> scanRegion(final SAMSequenceRecord contig, final int chromStart, final int chromEnd, final Rescue rescue) {
    final Map<Integer, Counter<Byte>> pos2bases = new HashMap<>(1 + chromEnd - chromStart);
    LOG.info("Scanning :" + contig.getSequenceName() + ":" + chromStart + "-" + chromEnd);
    for (int side = 0; side < 2; ++side) {
        if (// 5' or 3'
        !rescue.equals(Rescue.CENTER) && side > 0) {
            // already done
            break;
        }
        for (final SamReader samReader : samReaders) {
            final SAMSequenceDictionary dict2 = samReader.getFileHeader().getSequenceDictionary();
            final SAMSequenceRecord ssr2 = dict2.getSequence(contig.getSequenceName());
            if (ssr2 == null || ssr2.getSequenceLength() != contig.getSequenceLength()) {
                LOG.info("No contig " + contig.getSequenceName() + " with L=" + contig.getSequenceLength() + " bases in " + samReader.getResourceDescription());
                continue;
            }
            int mappedPos = -1;
            switch(rescue) {
                case LEFT:
                    mappedPos = 1;
                    break;
                case RIGHT:
                    mappedPos = contig.getSequenceLength();
                    break;
                case CENTER:
                    mappedPos = (side == 0 ? chromStart + 1 : chromEnd);
                    break;
                default:
                    throw new IllegalStateException();
            }
            final SAMRecordIterator iter = samReader.query(contig.getSequenceName(), mappedPos, mappedPos, false);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filter.filterOut(rec))
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null)
                    continue;
                final byte[] bases = rec.getReadBases();
                if (bases == null || bases.length == 0)
                    continue;
                int refPos1 = rec.getUnclippedStart();
                int readpos = 0;
                for (final CigarElement ce : cigar) {
                    final CigarOperator op = ce.getOperator();
                    for (int L = 0; L < ce.getLength(); ++L) {
                        if (op.consumesReadBases()) {
                            Counter<Byte> count = pos2bases.get(refPos1 - 1);
                            if (count == null) {
                                count = new Counter<>();
                                pos2bases.put(refPos1 - 1, count);
                            }
                            count.incr((byte) Character.toLowerCase(bases[readpos]));
                            readpos++;
                        }
                        if (op.consumesReferenceBases()) {
                            refPos1++;
                        }
                    }
                }
            }
            iter.close();
        }
    }
    return pos2bases;
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord)

Example 34 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamToFastq method doWork.

@Override
public int doWork(List<String> args) {
    SamReader sfr = null;
    SortingCollection<MappedFastq> fastqCollection = null;
    try {
        boolean found_single = false;
        boolean found_paired = false;
        long non_primary_alignmaned_flag = 0L;
        sfr = super.openSamReader(oneFileOrNull(args));
        fastqCollection = SortingCollection.newInstance(MappedFastq.class, new MappedFastqCodec(), new MappedFastqComparator(), this.maxRecordsInRam, this.tmpDir.toPath());
        fastqCollection.setDestructiveIteration(true);
        SAMRecordIterator iter = sfr.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.isSecondaryOrSupplementary()) {
                if (non_primary_alignmaned_flag == 0) {
                    LOG.warn("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS");
                }
                non_primary_alignmaned_flag++;
                continue;
            }
            MappedFastq m = new MappedFastq();
            m.name = rec.getReadName();
            if (m.name == null)
                m.name = "";
            m.seq = rec.getReadString();
            if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING))
                m.seq = "";
            m.qual = rec.getBaseQualityString();
            if (m.qual.equals(SAMRecord.NULL_QUALS_STRING))
                m.qual = "";
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                m.seq = AcidNucleics.reverseComplement(m.seq);
                m.qual = new StringBuilder(m.qual).reverse().toString();
            }
            if (m.seq.length() != m.qual.length()) {
                LOG.error("length(seq)!=length(qual) in " + m.name);
                continue;
            }
            if (m.seq.isEmpty() && m.qual.isEmpty()) {
                m.seq = "N";
                m.qual = "#";
            }
            if (rec.getReadPairedFlag()) {
                found_paired = true;
                if (found_single) {
                    sfr.close();
                    throw new RuntimeException("input is a mix of paired/singled reads");
                }
                m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1);
            } else {
                found_single = true;
                if (found_paired) {
                    sfr.close();
                    throw new RuntimeException("input is a mix of paired/singled reads");
                }
                m.side = (byte) 0;
            }
            fastqCollection.add(m);
        }
        iter.close();
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        progress.finish();
        fastqCollection.doneAdding();
        LOG.info("Done reading.");
        if (found_paired) {
            FastqWriter fqw1 = null;
            FastqWriter fqw2 = null;
            if (forwardFile != null) {
                LOG.info("Writing to " + forwardFile);
                fqw1 = new BasicFastqWriter(forwardFile);
            } else {
                LOG.info("Writing to stdout");
                fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
            }
            if (reverseFile != null) {
                LOG.info("Writing to " + reverseFile);
                fqw2 = new BasicFastqWriter(reverseFile);
            } else {
                LOG.info("Writing to interlaced stdout");
                fqw2 = fqw1;
            }
            List<MappedFastq> row = new ArrayList<MappedFastq>();
            CloseableIterator<MappedFastq> r = fastqCollection.iterator();
            for (; ; ) {
                MappedFastq curr = null;
                if (r.hasNext())
                    curr = r.next();
                if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) {
                    if (!row.isEmpty()) {
                        if (row.size() > 2) {
                            LOG.warn("WTF :" + row);
                        }
                        boolean found_F = false;
                        boolean found_R = false;
                        for (MappedFastq m : row) {
                            switch((int) m.side) {
                                case 1:
                                    if (found_F)
                                        throw new RuntimeException("two forward reads found for " + row.get(0).name);
                                    found_F = true;
                                    echo(fqw1, m);
                                    break;
                                case 2:
                                    if (found_R)
                                        throw new RuntimeException("two reverse reads found for " + row.get(0).name);
                                    found_R = true;
                                    echo(fqw2, m);
                                    break;
                                default:
                                    throw new IllegalStateException("uh???");
                            }
                        }
                        if (!found_F) {
                            if (this.repair_missing_read) {
                                LOG.warn("forward not found for " + row.get(0));
                                MappedFastq pad = new MappedFastq();
                                pad.side = (byte) 1;
                                pad.name = row.get(0).name;
                                pad.seq = "N";
                                pad.qual = "#";
                                echo(fqw1, pad);
                            } else {
                                throw new RuntimeException("forward not found for " + row);
                            }
                        }
                        if (!found_R) {
                            if (repair_missing_read) {
                                LOG.warn("reverse not found for " + row.get(0));
                                MappedFastq pad = new MappedFastq();
                                pad.side = (byte) 2;
                                pad.name = row.get(0).name;
                                pad.seq = "N";
                                pad.qual = "#";
                                echo(fqw2, pad);
                            } else {
                                throw new RuntimeException("reverse not found for " + row);
                            }
                        }
                    }
                    if (curr == null)
                        break;
                    row.clear();
                }
                row.add(curr);
            }
            r.close();
            fqw1.close();
            fqw2.close();
        } else if (found_single) {
            FastqWriter fqw1 = null;
            if (forwardFile != null) {
                LOG.info("Writing to " + forwardFile);
                fqw1 = new BasicFastqWriter(forwardFile);
            } else {
                LOG.info("Writing to stdout");
                fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
            }
            final CloseableIterator<MappedFastq> r = fastqCollection.iterator();
            while (r.hasNext()) {
                echo(fqw1, r.next());
            }
            r.close();
            fqw1.close();
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqCollection != null)
            fastqCollection.cleanup();
    }
}
Also used : PrintStream(java.io.PrintStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqWriter(htsjdk.samtools.fastq.FastqWriter) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter)

Example 35 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamTreePack method scan.

private void scan(final SamReader sfr) {
    super.bindings.put("header", sfr.getFileHeader());
    final SAMRecordIterator iter = sfr.iterator();
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader());
    while (iter.hasNext()) {
        final SAMRecord rec = progress.watch(iter.next());
        super.bindings.put("record", rec);
        super.nodeFactoryChain.watch(rootNode, rec);
    }
    progress.finish();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14