Search in sources :

Example 76 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class ExtendReferenceWithReads method scanRegion.

/**
 *scanRegion
 * @param contig    Reference sequence of interest.
 * @param start     0-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
 * @param end       0-based, exclusive end of interval of interest. Zero implies end of the reference sequence.
 */
private Map<Integer, Counter<Byte>> scanRegion(final SAMSequenceRecord contig, final int chromStart, final int chromEnd, final Rescue rescue) {
    final Map<Integer, Counter<Byte>> pos2bases = new HashMap<>(1 + chromEnd - chromStart);
    LOG.info("Scanning :" + contig.getSequenceName() + ":" + chromStart + "-" + chromEnd);
    for (int side = 0; side < 2; ++side) {
        if (// 5' or 3'
        !rescue.equals(Rescue.CENTER) && side > 0) {
            // already done
            break;
        }
        for (final SamReader samReader : samReaders) {
            final SAMSequenceDictionary dict2 = samReader.getFileHeader().getSequenceDictionary();
            final SAMSequenceRecord ssr2 = dict2.getSequence(contig.getSequenceName());
            if (ssr2 == null || ssr2.getSequenceLength() != contig.getSequenceLength()) {
                LOG.info("No contig " + contig.getSequenceName() + " with L=" + contig.getSequenceLength() + " bases in " + samReader.getResourceDescription());
                continue;
            }
            int mappedPos = -1;
            switch(rescue) {
                case LEFT:
                    mappedPos = 1;
                    break;
                case RIGHT:
                    mappedPos = contig.getSequenceLength();
                    break;
                case CENTER:
                    mappedPos = (side == 0 ? chromStart + 1 : chromEnd);
                    break;
                default:
                    throw new IllegalStateException();
            }
            final SAMRecordIterator iter = samReader.query(contig.getSequenceName(), mappedPos, mappedPos, false);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filter.filterOut(rec))
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null)
                    continue;
                final byte[] bases = rec.getReadBases();
                if (bases == null || bases.length == 0)
                    continue;
                int refPos1 = rec.getUnclippedStart();
                int readpos = 0;
                for (final CigarElement ce : cigar) {
                    final CigarOperator op = ce.getOperator();
                    for (int L = 0; L < ce.getLength(); ++L) {
                        if (op.consumesReadBases()) {
                            Counter<Byte> count = pos2bases.get(refPos1 - 1);
                            if (count == null) {
                                count = new Counter<>();
                                pos2bases.put(refPos1 - 1, count);
                            }
                            count.incr((byte) Character.toLowerCase(bases[readpos]));
                            readpos++;
                        }
                        if (op.consumesReferenceBases()) {
                            refPos1++;
                        }
                    }
                }
            }
            iter.close();
        }
    }
    return pos2bases;
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord)

Example 77 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class ExtendReferenceWithReads method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("No REF defined");
        return -1;
    }
    this.samReaders.clear();
    PrintStream out = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null) {
            LOG.error("Reference file is missing a sequence dictionary (use picard)");
            return -1;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
        for (final String uri : IOUtils.unrollFiles(args)) {
            LOG.info("opening BAM " + uri);
            final SamReader sr = srf.open(SamInputResource.of(uri));
            /* doesn't work with remote ?? */
            if (!sr.hasIndex()) {
                LOG.error("file " + uri + " is not indexed");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            if (!header.getSortOrder().equals(SortOrder.coordinate)) {
                LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
                return -1;
            }
            final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
            if (dict2 == null) {
                LOG.error("file " + uri + " needs a sequence dictionary");
                return -1;
            }
            samReaders.add(sr);
        }
        if (samReaders.isEmpty()) {
            LOG.error("No BAM defined");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintStream(this.outputFile);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
            int chromStart = 0;
            int nPrinted = 0;
            out.print(">");
            out.print(ssr.getSequenceName());
            for (final Rescue side : Rescue.values()) {
                switch(side) {
                    case LEFT:
                        /* look before position 0 of chromosome */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
                            int newstart = 0;
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos >= 0)
                                    continue;
                                newstart = Math.min(newstart, pos);
                            }
                            while (newstart < 0) {
                                final Counter<Byte> count = pos2bases.get(newstart);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                newstart++;
                                ++nPrinted;
                            }
                            break;
                        }
                    case RIGHT:
                        /* look after position > length(chromosome) */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
                            int newend = ssr.getSequenceLength();
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos < ssr.getSequenceLength())
                                    continue;
                                newend = Math.max(newend, pos + 1);
                            }
                            for (int i = ssr.getSequenceLength(); i < newend; i++) {
                                final Counter<Byte> count = pos2bases.get(i);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                ++nPrinted;
                            }
                            break;
                        }
                    case CENTER:
                        /* 0 to chromosome-length */
                        {
                            chromStart = 0;
                            while (chromStart < genomic.length()) {
                                final char base = Character.toUpperCase(genomic.charAt(chromStart));
                                if (base != 'N') {
                                    progress.watch(ssr.getSequenceName(), chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    out.print(base);
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                                int chromEnd = chromStart + 1;
                                while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
                                    ++chromEnd;
                                }
                                if (chromEnd - chromStart < this.minLenNNNNContig) {
                                    while (chromStart < chromEnd) {
                                        progress.watch(ssr.getSequenceName(), chromStart);
                                        if (nPrinted % 60 == 0)
                                            out.println();
                                        out.print(base);
                                        ++chromStart;
                                        ++nPrinted;
                                    }
                                    continue;
                                }
                                final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
                                while (chromStart < chromEnd) {
                                    final Counter<Byte> count = pos2bases.get(chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    if (count == null) {
                                        out.print('N');
                                    } else {
                                        out.print(consensus(count));
                                    }
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                            }
                            break;
                        }
                }
            // end switch type
            }
            out.println();
        }
        progress.finish();
        out.flush();
        out.close();
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        for (final SamReader r : samReaders) CloserUtil.close(r);
        samReaders.clear();
    }
}
Also used : PrintStream(java.io.PrintStream) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Example 78 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamToFastq method doWork.

@Override
public int doWork(List<String> args) {
    SamReader sfr = null;
    SortingCollection<MappedFastq> fastqCollection = null;
    try {
        boolean found_single = false;
        boolean found_paired = false;
        long non_primary_alignmaned_flag = 0L;
        sfr = super.openSamReader(oneFileOrNull(args));
        fastqCollection = SortingCollection.newInstance(MappedFastq.class, new MappedFastqCodec(), new MappedFastqComparator(), this.maxRecordsInRam, this.tmpDir.toPath());
        fastqCollection.setDestructiveIteration(true);
        SAMRecordIterator iter = sfr.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.isSecondaryOrSupplementary()) {
                if (non_primary_alignmaned_flag == 0) {
                    LOG.warn("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS");
                }
                non_primary_alignmaned_flag++;
                continue;
            }
            MappedFastq m = new MappedFastq();
            m.name = rec.getReadName();
            if (m.name == null)
                m.name = "";
            m.seq = rec.getReadString();
            if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING))
                m.seq = "";
            m.qual = rec.getBaseQualityString();
            if (m.qual.equals(SAMRecord.NULL_QUALS_STRING))
                m.qual = "";
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                m.seq = AcidNucleics.reverseComplement(m.seq);
                m.qual = new StringBuilder(m.qual).reverse().toString();
            }
            if (m.seq.length() != m.qual.length()) {
                LOG.error("length(seq)!=length(qual) in " + m.name);
                continue;
            }
            if (m.seq.isEmpty() && m.qual.isEmpty()) {
                m.seq = "N";
                m.qual = "#";
            }
            if (rec.getReadPairedFlag()) {
                found_paired = true;
                if (found_single) {
                    sfr.close();
                    throw new RuntimeException("input is a mix of paired/singled reads");
                }
                m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1);
            } else {
                found_single = true;
                if (found_paired) {
                    sfr.close();
                    throw new RuntimeException("input is a mix of paired/singled reads");
                }
                m.side = (byte) 0;
            }
            fastqCollection.add(m);
        }
        iter.close();
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        progress.finish();
        fastqCollection.doneAdding();
        LOG.info("Done reading.");
        if (found_paired) {
            FastqWriter fqw1 = null;
            FastqWriter fqw2 = null;
            if (forwardFile != null) {
                LOG.info("Writing to " + forwardFile);
                fqw1 = new BasicFastqWriter(forwardFile);
            } else {
                LOG.info("Writing to stdout");
                fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
            }
            if (reverseFile != null) {
                LOG.info("Writing to " + reverseFile);
                fqw2 = new BasicFastqWriter(reverseFile);
            } else {
                LOG.info("Writing to interlaced stdout");
                fqw2 = fqw1;
            }
            List<MappedFastq> row = new ArrayList<MappedFastq>();
            CloseableIterator<MappedFastq> r = fastqCollection.iterator();
            for (; ; ) {
                MappedFastq curr = null;
                if (r.hasNext())
                    curr = r.next();
                if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) {
                    if (!row.isEmpty()) {
                        if (row.size() > 2) {
                            LOG.warn("WTF :" + row);
                        }
                        boolean found_F = false;
                        boolean found_R = false;
                        for (MappedFastq m : row) {
                            switch((int) m.side) {
                                case 1:
                                    if (found_F)
                                        throw new RuntimeException("two forward reads found for " + row.get(0).name);
                                    found_F = true;
                                    echo(fqw1, m);
                                    break;
                                case 2:
                                    if (found_R)
                                        throw new RuntimeException("two reverse reads found for " + row.get(0).name);
                                    found_R = true;
                                    echo(fqw2, m);
                                    break;
                                default:
                                    throw new IllegalStateException("uh???");
                            }
                        }
                        if (!found_F) {
                            if (this.repair_missing_read) {
                                LOG.warn("forward not found for " + row.get(0));
                                MappedFastq pad = new MappedFastq();
                                pad.side = (byte) 1;
                                pad.name = row.get(0).name;
                                pad.seq = "N";
                                pad.qual = "#";
                                echo(fqw1, pad);
                            } else {
                                throw new RuntimeException("forward not found for " + row);
                            }
                        }
                        if (!found_R) {
                            if (repair_missing_read) {
                                LOG.warn("reverse not found for " + row.get(0));
                                MappedFastq pad = new MappedFastq();
                                pad.side = (byte) 2;
                                pad.name = row.get(0).name;
                                pad.seq = "N";
                                pad.qual = "#";
                                echo(fqw2, pad);
                            } else {
                                throw new RuntimeException("reverse not found for " + row);
                            }
                        }
                    }
                    if (curr == null)
                        break;
                    row.clear();
                }
                row.add(curr);
            }
            r.close();
            fqw1.close();
            fqw2.close();
        } else if (found_single) {
            FastqWriter fqw1 = null;
            if (forwardFile != null) {
                LOG.info("Writing to " + forwardFile);
                fqw1 = new BasicFastqWriter(forwardFile);
            } else {
                LOG.info("Writing to stdout");
                fqw1 = new BasicFastqWriter(new PrintStream(stdout()));
            }
            final CloseableIterator<MappedFastq> r = fastqCollection.iterator();
            while (r.hasNext()) {
                echo(fqw1, r.next());
            }
            r.close();
            fqw1.close();
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqCollection != null)
            fastqCollection.cleanup();
    }
}
Also used : PrintStream(java.io.PrintStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqWriter(htsjdk.samtools.fastq.FastqWriter) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter)

Example 79 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamScanSplitReads method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader r = null;
    SAMSequenceDictionary dic = null;
    final Set<String> sampleNames = new TreeSet<>();
    try {
        if (args.isEmpty()) {
            LOG.info("read stdin");
            r = openSamReader(null);
            sampleNames.addAll(samples(r.getFileHeader()));
            dic = r.getFileHeader().getSequenceDictionary();
            if (dic == null) {
                LOG.error("SAM input is missing a dictionary");
                return -1;
            }
            scanFile(r);
            r.close();
            r = null;
        } else
            for (final String filename : args) {
                LOG.info("read " + filename);
                r = openSamReader(filename);
                sampleNames.addAll(samples(r.getFileHeader()));
                final SAMSequenceDictionary dict2 = r.getFileHeader().getSequenceDictionary();
                if (dict2 == null) {
                    LOG.error("SAM input is missing a dictionary");
                    return -1;
                } else if (dic == null) {
                    dic = dict2;
                } else if (!SequenceUtil.areSequenceDictionariesEqual(dic, dict2)) {
                    LOG.error("incompatibles sequences dictionaries");
                    return -1;
                }
                scanFile(r);
                r.close();
                r = null;
            }
        if ("vcf".equalsIgnoreCase(this.outputFormat) || (this.outputFile != null && (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")))) {
            saveAsVcf(sampleNames, dic);
        } else {
            saveAsText();
        }
        return 0;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) TreeSet(java.util.TreeSet) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException)

Example 80 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class TestNg01 method assertIsBam.

private static void assertIsBam(final File f) {
    boolean b = false;
    try {
        final SamReader r = SamReaderFactory.makeDefault().open(f);
        r.iterator().stream().forEach(R -> R.getReadName());
        r.close();
        b = true;
    } catch (final Exception e) {
        b = false;
    }
    Assert.assertTrue(b, "file " + f + " should be bam.");
}
Also used : SamReader(htsjdk.samtools.SamReader) IOException(java.io.IOException)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26