Search in sources :

Example 71 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class Biostar90204 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.suffix_length < 0) {
        LOG.error("Bad value of suffix_length:" + this.suffix_length);
        return -1;
    }
    if (this.record_per_file < 1L) {
        LOG.error("Bad value of record_per_file:" + this.record_per_file);
        return -1;
    }
    SAMFileWriter sfw = null;
    SAMRecordIterator iter = null;
    SamReader samFileReader = null;
    PrintWriter manifest = new PrintWriter(new NullOuputStream());
    try {
        samFileReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = samFileReader.getFileHeader();
        int split_file_number = 0;
        long nReads = 0L;
        iter = samFileReader.iterator();
        if (this.manifestFile != null) {
            manifest.close();
            manifest = new PrintWriter(manifestFile);
        }
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (this.samRecordFilter.filterOut(rec))
                continue;
            ++nReads;
            if (sfw == null) {
                split_file_number++;
                final String pathname = (this.prefix.isEmpty() ? "" : this.prefix + ".") + String.format("%0" + suffix_length + "d", split_file_number) + ".bam";
                final File out = new File(pathname);
                manifest.write(pathname);
                manifest.write("\t" + (nReads) + "\t");
                final SAMFileHeader header2 = header.clone();
                header2.addComment("SPLIT:" + split_file_number);
                header2.addComment("SPLIT:Starting from Read" + nReads);
                sfw = this.writingBamArgs.openSAMFileWriter(out, header2, true);
            }
            sfw.addAlignment(rec);
            if (nReads % record_per_file == 0) {
                sfw.close();
                manifest.write((nReads) + "\n");
                sfw = null;
            }
        }
        if (sfw != null) {
            sfw.close();
            manifest.write((nReads) + "\n");
        }
        manifest.flush();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
        CloserUtil.close(sfw);
        CloserUtil.close(iter);
        CloserUtil.close(samFileReader);
    }
    return 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 72 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class Biostar59647 method doWork.

@Override
public int doWork(final List<String> args) {
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SamReader samFileReader = null;
    PrintStream pout;
    try {
        GenomicSequence genomicSequence = null;
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
        samFileReader = null;
        final String bamFile = oneFileOrNull(args);
        samFileReader = super.openSamReader(bamFile);
        if (!SequenceUtil.areSequenceDictionariesEqual(indexedFastaSequenceFile.getSequenceDictionary(), samFileReader.getFileHeader().getSequenceDictionary())) {
            LOG.warning("Not the same sequence dictionaries");
        }
        final XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        pout = (outputFile == null ? stdout() : new PrintStream(this.outputFile));
        final XMLStreamWriter w = xmlfactory.createXMLStreamWriter(pout, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("sam");
        w.writeAttribute("bam", (bamFile == null ? "stdin" : bamFile));
        w.writeAttribute("ref", refFile.getPath());
        w.writeComment(getProgramCommandLine());
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(samFileReader.getFileHeader().getSequenceDictionary());
        final SAMRecordIterator iter = samFileReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            progess.watch(rec);
            final byte[] readbases = rec.getReadBases();
            w.writeStartElement("read");
            w.writeStartElement("name");
            w.writeCharacters(rec.getReadName());
            w.writeEndElement();
            w.writeStartElement("sequence");
            w.writeCharacters(new String(readbases));
            w.writeEndElement();
            w.writeStartElement("flags");
            for (SAMFlag f : SAMFlag.values()) {
                w.writeAttribute(f.name(), String.valueOf(f.isSet(rec.getFlags())));
            }
            w.writeCharacters(String.valueOf(rec.getFlags()));
            // flags
            w.writeEndElement();
            if (!rec.getReadUnmappedFlag()) {
                w.writeStartElement("qual");
                w.writeCharacters(String.valueOf(rec.getMappingQuality()));
                w.writeEndElement();
                w.writeStartElement("chrom");
                w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
                w.writeCharacters(rec.getReferenceName());
                w.writeEndElement();
                w.writeStartElement("pos");
                w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
                w.writeEndElement();
                w.writeStartElement("cigar");
                w.writeCharacters(rec.getCigarString());
                w.writeEndElement();
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                w.writeStartElement("mate-chrom");
                w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
                w.writeCharacters(rec.getMateReferenceName());
                w.writeEndElement();
                w.writeStartElement("mate-pos");
                w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
                w.writeEndElement();
            }
            if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                    genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                }
                w.writeStartElement("align");
                int readIndex = 0;
                int refIndex = rec.getAlignmentStart();
                for (final CigarElement e : rec.getCigar().getCigarElements()) {
                    switch(e.getOperator()) {
                        // ignore hard clips
                        case H:
                            break;
                        // ignore pads
                        case P:
                            break;
                        // cont.
                        case I:
                        case S:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
                                    }
                                    readIndex++;
                                }
                                break;
                            }
                        // cont. -- reference skip
                        case N:
                        case D:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        w.writeAttribute("ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
                                    }
                                    refIndex++;
                                }
                                break;
                            }
                        case M:
                        case EQ:
                        case X:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    char baseRead = '\0';
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        baseRead = (char) (rec.getReadBases()[readIndex]);
                                        w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                        w.writeAttribute("read-base", String.valueOf(baseRead));
                                    }
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        char baseRef = genomicSequence.charAt(refIndex - 1);
                                        w.writeAttribute("ref-base", String.valueOf(baseRef));
                                        if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
                                            w.writeAttribute("mismatch", "true");
                                        }
                                    }
                                    refIndex++;
                                    readIndex++;
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
                    }
                }
                w.writeEndElement();
            }
            w.writeEndElement();
        }
        iter.close();
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        pout.flush();
        CloserUtil.close(w);
        CloserUtil.close(pout);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samFileReader);
        CloserUtil.close(indexedFastaSequenceFile);
    }
    return 0;
}
Also used : PrintStream(java.io.PrintStream) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMRecord(htsjdk.samtools.SAMRecord)

Example 73 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class Biostar76892 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        sfw = writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
        long nRecords = 0;
        final List<SAMRecord> buffer = new ArrayList<SAMRecord>();
        SAMRecordIterator iter = sfr.iterator();
        for (; ; ) {
            SAMRecord rec = null;
            // get next record
            if (iter.hasNext()) {
                rec = iter.next();
                ++nRecords;
                if (nRecords % 1000000 == 0)
                    LOG.info("records: " + nRecords);
                if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || rec.getProperPairFlag() || rec.getReferenceIndex() != rec.getMateReferenceIndex() || rec.getReadNegativeStrandFlag() == !rec.getMateNegativeStrandFlag()) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(rec);
                    continue;
                }
            }
            if (rec != null) {
                int i = 0;
                // cleanup buffer
                int mate_index = -1;
                while (i < buffer.size()) {
                    SAMRecord prev = buffer.get(i);
                    if (prev.getReferenceIndex() != rec.getReferenceIndex() || prev.getAlignmentEnd() + distance < rec.getAlignmentStart()) {
                        if (!onlySaveFixed)
                            sfw.addAlignment(prev);
                        buffer.remove(i);
                    } else if (prev.getReadName().equals(rec.getReadName()) && ((prev.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) || (rec.getFirstOfPairFlag() && prev.getSecondOfPairFlag()))) {
                        mate_index = i;
                        ++i;
                    } else {
                        ++i;
                    }
                }
                if (mate_index == -1) {
                    buffer.add(rec);
                } else {
                    final SAMRecord mate = buffer.get(mate_index);
                    buffer.remove(mate_index);
                    LOG.info("changing " + rec + " " + mate);
                    if (mate.getReadNegativeStrandFlag()) {
                        mate.setReadNegativeStrandFlag(false);
                        rec.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
                    } else {
                        rec.setReadNegativeStrandFlag(false);
                        mate.setMateNegativeStrandFlag(rec.getReadNegativeStrandFlag());
                    }
                    if (!mate.getReadFailsVendorQualityCheckFlag() && !rec.getReadFailsVendorQualityCheckFlag()) {
                        mate.setProperPairFlag(true);
                        rec.setProperPairFlag(true);
                    }
                    mate.setAttribute("rv", 1);
                    rec.setAttribute("rv", 1);
                    sfw.addAlignment(mate);
                    sfw.addAlignment(rec);
                }
            } else {
                for (final SAMRecord r : buffer) {
                    if (!onlySaveFixed)
                        sfw.addAlignment(r);
                }
                break;
            }
        }
        LOG.info("done");
        sfw.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList)

Example 74 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class Biostar78400 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.XML == null) {
        LOG.error("XML file missing");
        return -1;
    }
    final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
        final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
        if (rgl.flowcells.isEmpty()) {
            LOG.error("empty XML " + XML);
            return -1;
        }
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader().clone();
        header.addComment("Processed with " + getProgramName());
        final Set<String> seenids = new HashSet<String>();
        final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
        for (final FlowCell fc : rgl.flowcells) {
            final Map<Integer, String> lane2id = new HashMap<Integer, String>();
            for (final Lane lane : fc.lanes) {
                for (final ReadGroup rg : lane.readGroups) {
                    if (seenids.contains(rg.id)) {
                        LOG.error("Group id " + rg.id + " defined twice");
                        return -1;
                    }
                    seenids.add(rg.id);
                    // create the read group we'll be using
                    final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
                    rgrec.setLibrary(rg.library);
                    rgrec.setPlatform(rg.platform);
                    rgrec.setSample(rg.sample);
                    rgrec.setPlatformUnit(rg.platformunit);
                    if (rg.center != null)
                        rgrec.setSequencingCenter(rg.center);
                    if (rg.description != null)
                        rgrec.setDescription(rg.description);
                    lane2id.put(lane.id, rg.id);
                    samReadGroupRecords.add(rgrec);
                }
            }
            if (flowcell2lane2id.containsKey(fc.name)) {
                LOG.error("FlowCell id " + fc.name + " defined twice in XML");
                return -1;
            }
            flowcell2lane2id.put(fc.name, lane2id);
        }
        header.setReadGroups(samReadGroupRecords);
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Matcher matcher = readNameSignature.matcher(rec.getReadName());
            final String flowcellStr;
            final String laneStr;
            if (matcher.matches()) {
                flowcellStr = matcher.group(1);
                laneStr = matcher.group(2);
            } else {
                LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
                return -1;
            }
            String RGID = null;
            final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
            if (lane2id == null)
                throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
            try {
                RGID = lane2id.get(Integer.parseInt(laneStr));
            } catch (final Exception e) {
                LOG.error("bad lane-Id in " + rec.getReadName());
                return -1;
            }
            if (RGID == null) {
                throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
            }
            rec.setAttribute(SAMTag.RG.name(), RGID);
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) SamReader(htsjdk.samtools.SamReader) Unmarshaller(javax.xml.bind.Unmarshaller) HashSet(java.util.HashSet) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) StreamSource(javax.xml.transform.stream.StreamSource) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Example 75 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SoftClipAnnotator method initialize.

public void initialize() {
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
    for (final String sample : samples) {
        this.sample2bam.put(sample, new ArrayList<>());
    }
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
        if (isUniqueHeaderLine(line, hInfo))
            hInfo.add(line);
    }
    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    VCFHeader header2 = new VCFHeader(vcfHeader);
    header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
    header2.addMetaDataLine(this.filterHaveClipInVariant);
    vcfWriter.writeHeader(header2);
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
        logger.info("opening " + samFilename);
        final SamReader r = srf.open(samFilename);
        final Set<String> sampleset = new HashSet<>();
        for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
            if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
                continue;
            sampleset.add(g.getSample());
        }
        if (sampleset.isEmpty()) {
            logger.info("no VCF sample in " + samFilename);
            CloserUtil.close(r);
            continue;
        }
        this.samReaders.add(r);
        for (final String sample : sampleset) {
            if (!this.sample2bam.containsKey(sample))
                continue;
            this.sample2bam.get(sample).add(r);
        }
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26