Search in sources :

Example 81 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class SamFixCigar method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader();
        sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final List<CigarElement> newCigar = new ArrayList<CigarElement>();
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            newCigar.clear();
            int refPos1 = rec.getAlignmentStart();
            int readPos0 = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.M)) {
                    for (int i = 0; i < ce.getLength(); ++i) {
                        char c1 = Character.toUpperCase((char) bases[readPos0]);
                        char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
                        if (c2 == 'N' || c1 == c2) {
                            newCigar.add(new CigarElement(1, CigarOperator.EQ));
                        } else {
                            newCigar.add(new CigarElement(1, CigarOperator.X));
                        }
                        refPos1++;
                        readPos0++;
                    }
                } else {
                    newCigar.add(ce);
                    if (op.consumesReadBases())
                        readPos0 += ce.getLength();
                    if (op.consumesReferenceBases())
                        refPos1 += ce.getLength();
                }
            }
            int i = 0;
            while (i < newCigar.size()) {
                final CigarOperator op1 = newCigar.get(i).getOperator();
                final int length1 = newCigar.get(i).getLength();
                if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
                    final CigarOperator op2 = newCigar.get(i + 1).getOperator();
                    int length2 = newCigar.get(i + 1).getLength();
                    newCigar.set(i, new CigarElement(length1 + length2, op2));
                    newCigar.remove(i + 1);
                } else {
                    ++i;
                }
            }
            cigar = new Cigar(newCigar);
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            rec.setCigar(cigar);
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 82 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BWAMemNOp method doWork.

@Override
public int doWork(List<String> args) {
    SamReader r = null;
    SAMFileWriter w = null;
    try {
        r = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = r.getFileHeader();
        OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
        w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
        SAMRecordIterator iter = r.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            if (rec.getSupplementaryAlignmentFlag()) {
                continue;
            }
            if (rec.getReadUnmappedFlag()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            Cigar cigar1 = rec.getCigar();
            if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
            {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            rec.getAlignmentStart();
            List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
            if (xps.isEmpty()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            boolean found_one = false;
            for (OtherCanonicalAlign xp : xps) {
                if (!rec.getReferenceName().equals(xp.getReferenceName()))
                    continue;
                if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
                    continue;
                Cigar cigar2 = xp.getCigar();
                if (cigar2 == null || cigar2.isEmpty()) {
                    continue;
                }
                SAMRecord newrec = null;
                List<CigarEvt> L1 = null;
                List<CigarEvt> L2 = null;
                if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = rec.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar1);
                    L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
                } else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = xp.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar2);
                    L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
                }
                if (newrec == null)
                    continue;
                newrec.setFlags(rec.getFlags());
                newrec.setReadName(rec.getReadName());
                newrec.setReadBases(rec.getReadBases());
                newrec.setMappingQuality(rec.getMappingQuality());
                newrec.setReferenceIndex(rec.getReferenceIndex());
                newrec.setBaseQualities(rec.getBaseQualities());
                if (found_one) {
                    newrec.setNotPrimaryAlignmentFlag(true);
                }
                found_one = true;
                for (SAMTagAndValue tav : rec.getAttributes()) {
                    if (tav.tag.equals(ocaf.getAttributeKey()))
                        continue;
                    if (tav.tag.equals("NM"))
                        continue;
                    newrec.setAttribute(tav.tag, tav.value);
                }
                if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                    newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
                    newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
                    newrec.setInferredInsertSize(rec.getInferredInsertSize());
                }
                while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
                    L1.remove(L1.size() - 1);
                }
                while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
                    L2.remove(0);
                }
                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                int i = 0;
                while (i < L1.size()) {
                    int j = i + 1;
                    while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L1.get(i).op));
                    i = j;
                }
                // add 'N'
                cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
                i = 0;
                while (i < L2.size()) {
                    int j = i + 1;
                    while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L2.get(i).op));
                    i = j;
                }
                // cleanup : case where  'S' is close to 'N'
                i = 0;
                while (i + 1 < cigarElements.size()) {
                    CigarElement ce1 = cigarElements.get(i);
                    CigarElement ce2 = cigarElements.get(i + 1);
                    if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
                        cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
                    } else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
                        cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
                    }
                    i++;
                }
                newrec.setCigar(new Cigar(cigarElements));
                List<SAMValidationError> validations = newrec.isValid();
                if (validations != null) {
                    for (SAMValidationError err : validations) {
                        LOG.warning(err.getType() + ":" + err.getMessage());
                    }
                }
                w.addAlignment(newrec);
            }
            if (!found_one) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
            }
        }
        iter.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(w);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Example 83 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class FindMyVirus method doWork.

@Override
public int doWork(List<String> args) {
    if (virusNames.isEmpty()) {
        LOG.error("no virus name");
        return -1;
    }
    SamReader sfr = null;
    SAMFileWriter[] sfwArray = new SAMFileWriter[CAT.values().length];
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (CAT category : CAT.values()) {
            LOG.info("Opening " + category);
            SAMFileHeader header2 = header.clone();
            header2.addComment("Category:" + category.name());
            header2.addComment("Description:" + category.getDescription());
            SAMProgramRecord rec = header2.createProgramRecord();
            rec.setCommandLine(this.getProgramCommandLine());
            rec.setProgramName(getProgramName());
            rec.setProgramVersion(getVersion());
            rec.setAttribute("CAT", category.name());
            File outputFile = new File(this.outputFile.getParentFile(), this.outputFile.getName() + "." + category.name() + ".bam");
            LOG.info("Opening " + outputFile);
            File countFile = new File(outputFile.getParentFile(), outputFile.getName() + "." + category.name() + ".count.txt");
            SAMFileWriter sfw = writingBamArgs.openSAMFileWriter(outputFile, header2, true);
            sfw = new SAMFileWriterCount(sfw, countFile, category);
            sfwArray[category.ordinal()] = sfw;
        }
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        OtherCanonicalAlignFactory xpAlignFactory = new OtherCanonicalAlignFactory(header);
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            progress.watch(rec);
            CAT category = null;
            List<OtherCanonicalAlign> xpList = Collections.emptyList();
            if (category == null && !rec.getReadPairedFlag()) {
                category = CAT.unpaired;
            }
            if (category == null && rec.isSecondaryOrSupplementary()) {
                category = CAT.secondary;
            }
            if (category == null && rec.getReadFailsVendorQualityCheckFlag()) {
                category = CAT.failsqual;
            }
            if (category == null && rec.getDuplicateReadFlag()) {
                category = CAT.duplicate;
            }
            if (category == null && rec.getReadUnmappedFlag()) {
                category = CAT.unmapped;
            }
            if (category == null) {
                xpList = xpAlignFactory.getXPAligns(rec);
            }
            boolean xp_containsVirus = false;
            boolean xp_containsChrom = false;
            for (OtherCanonicalAlign xpa : xpList) {
                if (virusNames.contains(xpa.getReferenceName())) {
                    xp_containsVirus = true;
                } else {
                    xp_containsChrom = true;
                }
            }
            /* both reads mapped on ref */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsVirus) {
                    category = CAT.both_ref;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /*  pair(unmapped,mapped on reference) */
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsVirus) {
                    category = CAT.ref_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /* both reads mapped on virus */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsChrom) {
                    category = CAT.both_virus;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsChrom) {
                    category = CAT.virus_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && ((virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) || (!virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())))) {
                category = CAT.ref_and_virus;
            }
            /*dispatch */
            if (category == null) {
                LOG.warning("Not handled: " + rec);
                category = CAT.undetermined;
            }
            sfwArray[category.ordinal()].addAlignment(rec);
        }
        for (SAMFileWriter sfw : sfwArray) {
            LOG.info("Closing " + sfw);
            sfw.close();
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        LOG.info("Closing");
        CloserUtil.close(sfr);
        CloserUtil.close(sfwArray);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 84 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class FindCorruptedFiles method testBam.

private void testBam(File f) {
    LOG.fine("Test BAM for " + f);
    // Test BGZ-EOF
    try {
        BlockCompressedInputStream.FileTermination type = BlockCompressedInputStream.checkTermination(f);
        if (type != BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) {
            LOG.warning("bgz:" + type + " for " + f);
            stdout().println(f);
            return;
        }
    } catch (IOException err) {
        LOG.warning("Error in " + f);
        stdout().println(f);
        return;
    }
    long n = 0L;
    SamReader r = null;
    SAMRecordIterator iter = null;
    try {
        r = super.createSamReaderFactory().validationStringency(this.validationStringency).open(f);
        r.getFileHeader();
        iter = r.iterator();
        while (iter.hasNext() && (NUM < 0 || n < NUM)) {
            iter.next();
            ++n;
        }
        if (n == 0) {
            emptyFile(f);
        }
    } catch (final Exception e) {
        LOG.warning("Error in " + f);
        stdout().println(f);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) IOException(java.io.IOException) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream) TribbleException(htsjdk.tribble.TribbleException) IOException(java.io.IOException)

Example 85 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class TView method paint.

void paint(final PrintStream out) {
    final Colorizer colorizer;
    switch(this.formatOut) {
        case html:
            colorizer = new HtmlColorizer(out);
            break;
        case tty:
            colorizer = new AnsiColorizer(out);
            break;
        case plain:
            colorizer = new Colorizer(out);
            break;
        default:
            throw new IllegalStateException();
    }
    if (interval == null) {
        LOG.warn("No interval defined");
        return;
    }
    final GenomicSequence contigSequence;
    final Function<Integer, Character> refPosToBase;
    if (indexedFastaSequenceFile != null) {
        final SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceFile);
        if (dict.getSequence(this.interval.getContig()) == null) {
            LOG.warn("No interval with contig " + interval + " in REF");
            return;
        }
        contigSequence = new GenomicSequence(indexedFastaSequenceFile, interval.getContig());
        refPosToBase = POS -> {
            if (POS < 0 || POS >= contigSequence.length())
                return 'N';
            return contigSequence.charAt(POS);
        };
    } else {
        contigSequence = null;
        refPosToBase = POS -> 'N';
    }
    /**
     * test if genomic position is in interval
     */
    final Predicate<Integer> testInInterval = new Predicate<Integer>() {

        @Override
        public boolean test(final Integer pos) {
            return interval.getStart() <= pos && pos <= interval.getEnd();
        }
    };
    final int pixelWidth = this.interval.length();
    final Map<Integer, Integer> genomicpos2insertlen = new TreeMap<>();
    final Map<String, List<SAMRecord>> group2record = new TreeMap<>();
    for (final SamReader samReader : this.samReaders) {
        SAMRecordIterator iter = samReader.query(this.interval.getContig(), this.interval.getStart(), this.interval.getEnd(), false);
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (rec.getReadUnmappedFlag())
                continue;
            if (rec.getCigar() == null)
                continue;
            if (getRecordFilter().filterOut(rec))
                continue;
            if (!rec.getContig().equals(interval.getContig()))
                continue;
            if (right().apply(rec) < this.interval.getStart())
                continue;
            if (this.interval.getEnd() < left().apply(rec))
                continue;
            String group = this.groupBy.getPartion(rec);
            if (group == null || group.isEmpty()) {
                group = "undefined_" + this.groupBy.name();
            }
            List<SAMRecord> records = group2record.get(group);
            if (records == null) {
                records = new ArrayList<>();
                group2record.put(group, records);
            }
            records.add(rec);
            // loop over cigar, get the longest insert
            int refpos = rec.getAlignmentStart();
            for (final CigarElement ce : rec.getCigar().getCigarElements()) {
                if (!this.showInsertions)
                    break;
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.I) && testInInterval.test(refpos)) {
                    final Integer longestInsert = genomicpos2insertlen.get(refpos);
                    if (longestInsert == null || longestInsert.compareTo(ce.getLength()) < 0) {
                        genomicpos2insertlen.put(refpos, ce.getLength());
                    }
                }
                if (op.consumesReferenceBases()) {
                    refpos += ce.getLength();
                }
                if (refpos > interval.getEnd())
                    break;
            }
        }
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
    }
    /**
     * compute where are the insertions
     */
    // LOG.debug(genomicpos2insertlen);
    final Predicate<Integer> insertIsPresentAtX = SCREENX -> {
        int x = 0;
        int ref = interval.getStart();
        while (x < pixelWidth) {
            if (x > SCREENX)
                return false;
            final Integer insertLen = genomicpos2insertlen.get(ref);
            if (insertLen == null) {
                ++x;
                ++ref;
            } else {
                if (x <= SCREENX && SCREENX < x + insertLen)
                    return true;
                // (+1) I DON'T UNDERSTAND WHY, BUT IT WORKS
                x += (insertLen + 1);
                ++ref;
            }
        }
        return false;
    };
    final Function<Character, AnsiColor> base2ansiColor = BASE -> {
        switch(Character.toUpperCase(BASE)) {
            case 'A':
                return AnsiColor.BLUE;
            case 'T':
                return AnsiColor.GREEN;
            case 'G':
                return AnsiColor.CYAN;
            case 'C':
                return AnsiColor.YELLOW;
            default:
                return null;
        }
    };
    /**
     * print interval title
     */
    out.println(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
    /**
     * paint base position
     */
    int ref = this.interval.getStart();
    int x = 0;
    out.print(margin("POS:"));
    while (x < pixelWidth) {
        if (insertIsPresentAtX.test(x)) {
            colorizer.pen(AnsiColor.RED).print("^");
            ++x;
        } else if ((ref - this.interval.getStart()) % 10 == 0) {
            final String f = String.format("%d", ref);
            for (int i = 0; i < f.length() && x < pixelWidth; ++i) {
                colorizer.pen(AnsiColor.GREEN).print(f.charAt(i));
                if (!insertIsPresentAtX.test(x))
                    ++ref;
                ++x;
            }
        } else {
            out.print(".");
            ++ref;
            ++x;
        }
    }
    out.println();
    /* paint ref base */
    out.print(margin("REF:"));
    ref = this.interval.getStart();
    x = 0;
    while (x < pixelWidth) {
        if (insertIsPresentAtX.test(x)) {
            colorizer.paper(AnsiColor.YELLOW).print("*");
            ++x;
        } else {
            char refBase = refPosToBase.apply(ref - 1);
            colorizer.pen(base2ansiColor.apply(refBase)).print(refBase);
            ++ref;
            ++x;
        }
    }
    out.println();
    /* loop over samples **/
    for (final String groupName : group2record.keySet()) {
        if (this.maxReadRowPerGroup == 0)
            continue;
        final ConsensusBuilder consensus = new ConsensusBuilder();
        int y_group = 0;
        final List<List<SAMRecord>> rows = new ArrayList<>();
        out.println(margin(""));
        switch(this.layoutReads) {
            case name:
                {
                    rows.addAll(group2record.get(groupName).stream().sorted((R1, R2) -> R1.getReadName().compareTo(R2.getReadName())).map(R -> Collections.singletonList(R)).collect(Collectors.toList()));
                    break;
                }
            default:
                {
                    /* pileup reads */
                    for (final SAMRecord rec : group2record.get(groupName)) {
                        int y = 0;
                        for (y = 0; y < rows.size(); ++y) {
                            final List<SAMRecord> row = rows.get(y);
                            final SAMRecord last = row.get(row.size() - 1);
                            if (right().apply(last) + this.distance_between_reads < left().apply(rec)) {
                                row.add(rec);
                                break;
                            }
                        }
                        if (y == rows.size()) {
                            final List<SAMRecord> row = new ArrayList<>();
                            row.add(rec);
                            rows.add(row);
                        }
                    }
                    break;
                }
        }
        // each row is only one read, so we need to print the groupName
        if (layoutReads == LayoutReads.name) {
            out.print(margin(groupName));
            out.println();
        }
        /* print each row */
        for (final List<SAMRecord> row : rows) {
            ++y_group;
            boolean print_this_line = (this.maxReadRowPerGroup < 0 || y_group <= this.maxReadRowPerGroup);
            if (print_this_line) {
                // each row is only one read, print the read name
                if (layoutReads == LayoutReads.name) {
                    String readName = row.get(0).getReadName();
                    if (row.get(0).getReadPairedFlag()) {
                        if (row.get(0).getFirstOfPairFlag()) {
                            readName += "/1";
                        }
                        if (row.get(0).getSecondOfPairFlag()) {
                            readName += "/2";
                        }
                    }
                    out.print(margin(readName));
                } else {
                    out.print(margin(y_group == 1 ? groupName : ""));
                }
            }
            ref = interval.getStart();
            x = 0;
            for (final SAMRecord rec : row) {
                int readRef = left().apply(rec);
                // pad before record
                while (x < pixelWidth && ref < readRef && testInInterval.test(ref)) {
                    if (!insertIsPresentAtX.test(x))
                        ++ref;
                    ++x;
                    if (print_this_line)
                        out.print(' ');
                    consensus.add(' ');
                }
                int readpos = 0;
                /* get read base function */
                final Function<Integer, Character> baseAt = new Function<Integer, Character>() {

                    @Override
                    public Character apply(final Integer readpos) {
                        final byte[] readBases = rec.getReadBases();
                        if (readBases == SAMRecord.NULL_SEQUENCE)
                            return 'N';
                        if (readpos < 0 || readpos >= rec.getReadLength())
                            return '?';
                        return (char) readBases[readpos];
                    }
                };
                for (final CigarElement ce : rec.getCigar()) {
                    final CigarOperator op = ce.getOperator();
                    if (op.equals(CigarOperator.PADDING))
                        continue;
                    /* IN INSERTION, only print if showInsertions is true */
                    if (this.showInsertions && op.equals(CigarOperator.I)) {
                        int cigarIdx = 0;
                        while (x < pixelWidth && cigarIdx < ce.getLength()) {
                            if (testInInterval.test(readRef)) {
                                final char readbase = baseAt.apply(readpos);
                                if (print_this_line)
                                    colorizer.paper(AnsiColor.RED).print(readbase);
                                consensus.add(readbase);
                                ++x;
                            }
                            ++cigarIdx;
                            ++readpos;
                        }
                        continue;
                    }
                    int cigarIdx = 0;
                    while (x < pixelWidth && cigarIdx < ce.getLength()) {
                        colorizer.clear();
                        // pad before base
                        while (x < pixelWidth && testInInterval.test(readRef) && (insertIsPresentAtX.test(x))) {
                            ++x;
                            if (print_this_line)
                                colorizer.paper(AnsiColor.YELLOW).print("*");
                            consensus.add(' ');
                            continue;
                        }
                        switch(op) {
                            case I:
                                {
                                    // if visible, processed above
                                    if (showInsertions)
                                        throw new IllegalStateException();
                                    readpos++;
                                    break;
                                }
                            case P:
                                break;
                            case H:
                                {
                                    if (showClip) {
                                        if (testInInterval.test(readRef)) {
                                            if (print_this_line)
                                                colorizer.paper(AnsiColor.YELLOW).print('N');
                                            // CLIPPED base not part of consensus
                                            consensus.add(' ');
                                            ++x;
                                        }
                                        ++readRef;
                                    }
                                    break;
                                }
                            case S:
                                {
                                    if (showClip) {
                                        if (testInInterval.test(readRef)) {
                                            final char readBase = baseAt.apply(readpos);
                                            if (print_this_line)
                                                colorizer.paper(AnsiColor.YELLOW).print(readBase);
                                            // CLIPPED base not part of consensus
                                            consensus.add(' ');
                                            ++x;
                                        }
                                        ++readpos;
                                        ++readRef;
                                    } else {
                                        readpos++;
                                    }
                                    break;
                                }
                            case D:
                            case N:
                                {
                                    if (testInInterval.test(readRef)) {
                                        if (print_this_line)
                                            colorizer.paper(AnsiColor.RED).print('-');
                                        // deletion not not part of consensus
                                        consensus.add(' ');
                                        ++x;
                                    }
                                    ++readRef;
                                    break;
                                }
                            case EQ:
                            case M:
                            case X:
                                {
                                    if (testInInterval.test(readRef)) {
                                        final char refBase = Character.toUpperCase(refPosToBase.apply(readRef - 1));
                                        char readBase = Character.toUpperCase(baseAt.apply(readpos));
                                        consensus.add(readBase);
                                        colorizer.pen(base2ansiColor.apply(readBase));
                                        if (op.equals(CigarOperator.X) || (refBase != 'N' && readBase != 'N' && readBase != refBase)) {
                                            colorizer.pen(AnsiColor.RED);
                                        } else if (hideBases) {
                                            if (rec.getReadNegativeStrandFlag()) {
                                                readBase = ',';
                                            } else {
                                                readBase = '.';
                                            }
                                        }
                                        if (showReadName) {
                                            final String readName = rec.getReadName();
                                            if (readpos < 0 || readpos >= readName.length()) {
                                                readBase = '_';
                                            } else {
                                                readBase = readName.charAt(readpos);
                                            }
                                        }
                                        if (rec.getReadNegativeStrandFlag()) {
                                            readBase = Character.toLowerCase(readBase);
                                        } else {
                                            readBase = Character.toUpperCase(readBase);
                                        }
                                        if (print_this_line)
                                            colorizer.print(readBase);
                                        ++x;
                                    }
                                    ++readpos;
                                    ++readRef;
                                    break;
                                }
                        }
                        ++cigarIdx;
                    }
                }
                // end of loop cigar
                ref = readRef;
            }
            // out.println( " "+ref+" "+row.get(0).getAlignmentStart()+" "+row.get(0).getCigarString()+" "+row.get(0).getReadString());
            while (x < pixelWidth) {
                if (print_this_line)
                    out.print(" ");
                ++x;
            }
            if (print_this_line)
                out.println();
            consensus.eol();
            if (out.checkError())
                break;
        }
        if (out.checkError())
            break;
        if (!this.hideConsensus && consensus.bases.stream().anyMatch(C -> C.getCoverage() > 0)) {
            out.print(margin(groupName + " CONSENSUS"));
            x = 0;
            ref = interval.getStart();
            while (x < consensus.bases.size()) {
                final char refBase = Character.toUpperCase(refPosToBase.apply(ref - 1));
                final char consensusBase = consensus.bases.get(x).getConsensus();
                if (Character.isWhitespace(consensusBase)) {
                // nothing
                } else if (refBase != 'N' && consensusBase != refBase) {
                    colorizer.pen(AnsiColor.RED);
                } else {
                    colorizer.pen(base2ansiColor.apply(consensusBase));
                }
                if (!insertIsPresentAtX.test(x))
                    ++ref;
                colorizer.print(consensusBase);
                ++x;
            }
            out.println();
        }
        if (this.numCoverageRows > 0) {
            int minCov = consensus.bases.stream().mapToInt(C -> C.getCoverage()).min().orElse(0);
            final int maxCov = consensus.bases.stream().mapToInt(C -> C.getCoverage()).max().orElse(0);
            for (int y = 0; maxCov > 0 && y < this.numCoverageRows; ++y) {
                if (minCov == maxCov)
                    minCov--;
                double fract = (maxCov - minCov) / ((double) this.numCoverageRows);
                int inverse_y = (this.numCoverageRows - 1) - y;
                int d0 = (int) ((fract) * inverse_y);
                // int d1 = (int)((fract) * (inverse_y+1));
                out.print(margin(y == 0 ? groupName + " " + maxCov : (y + 1 == this.numCoverageRows ? String.valueOf(minCov) : "")));
                for (x = 0; x < consensus.bases.size(); ++x) {
                    int depth = consensus.bases.get(x).getCoverage() - minCov;
                    colorizer.print(depth >= d0 ? BLACK_SQUARE : ' ');
                }
                out.println();
            }
        }
    }
    if (this.tabixKnownGene != null && this.indexedFastaSequenceFile != null) {
        final List<KnownGene> genes = this.tabixKnownGene.getItemsInInterval(this.interval);
        if (!genes.isEmpty()) {
            out.println(this.knownGeneUri);
            for (final KnownGene gene : genes) {
                final KnownGene.CodingRNA codingRna = gene.getCodingRNA(contigSequence);
                final KnownGene.Peptide peptide = codingRna.getPeptide();
                out.print(margin(gene.getName()));
                x = 0;
                int ref0 = this.interval.getStart() - 1;
                while (x < pixelWidth) {
                    if (insertIsPresentAtX.test(x)) {
                        out.print("*");
                        ++x;
                    } else {
                        char pepChar = ' ';
                        if (ref0 >= gene.getTxStart() && ref0 < gene.getTxEnd()) {
                            pepChar = (gene.isPositiveStrand() ? '>' : '<');
                            int pepIdx = peptide.convertGenomicToPeptideCoordinate(ref0);
                            if (pepIdx != -1) {
                                final String aa3 = GeneticCode.aminoAcidTo3Letters(peptide.charAt(pepIdx));
                                final int[] offset = peptide.convertToGenomicCoordinates(pepIdx);
                                if (offset != null && offset.length == 3 && aa3 != null && aa3.length() == 3) {
                                    if (offset[0] == ref0)
                                        pepChar = aa3.charAt(0);
                                    else if (offset[1] == ref0)
                                        pepChar = aa3.charAt(1);
                                    else if (offset[2] == ref0)
                                        pepChar = aa3.charAt(2);
                                    else
                                        pepChar = '?';
                                } else {
                                    pepChar = '?';
                                }
                            }
                        }
                        out.print(pepChar);
                        ++ref0;
                        ++x;
                    }
                }
                while (x < pixelWidth) {
                    out.print(" ");
                    ++x;
                }
                out.println();
            }
        }
        out.println();
    }
    /**
     * variant section
     */
    if (!this.vcfReaders.isEmpty() && !out.checkError()) {
        final Function<GenotypeType, Character> gTypeToSymbol = new Function<GenotypeType, Character>() {

            @Override
            public Character apply(final GenotypeType gt) {
                switch(gt) {
                    case NO_CALL:
                        return '?';
                    case HOM_REF:
                        return '0';
                    case HET:
                        return '1';
                    case HOM_VAR:
                        return '2';
                    case MIXED:
                        return 'm';
                    case UNAVAILABLE:
                        return 'u';
                    default:
                        return '.';
                }
            }
        };
        out.println();
        for (final VcfSource r : this.vcfReaders) {
            if (out.checkError())
                break;
            final VCFHeader header = r.vcfFileReader.getFileHeader();
            final CloseableIterator<VariantContext> iter = r.vcfFileReader.query(this.interval.getContig(), interval.getStart(), interval.getEnd());
            final List<VariantContext> variants = new ArrayList<>();
            while (iter.hasNext()) {
                variants.add(iter.next());
            }
            iter.close();
            if (variants.isEmpty())
                continue;
            out.println(r.vcfFile.getPath());
            if (header.hasGenotypingData()) {
                for (final String sample : header.getSampleNamesInOrder()) {
                    if (!variants.stream().map(V -> V.getGenotype(sample)).filter(G -> !hideNoCall || (hideNoCall && !G.isNoCall())).filter(G -> !hideHomRef || (hideHomRef && !G.isHomRef())).findAny().isPresent()) {
                        continue;
                    }
                    out.print(margin(sample));
                    ref = this.interval.getStart();
                    x = 0;
                    while (x < pixelWidth) {
                        if (insertIsPresentAtX.test(x)) {
                            out.print("*");
                            ++x;
                        } else {
                            char refBase = ' ';
                            for (final VariantContext ctx : variants) {
                                if (ctx.getStart() == ref) {
                                    final Genotype g = ctx.getGenotype(sample);
                                    if (g.isNoCall() && this.hideNoCall)
                                        continue;
                                    if (g.isHomRef() && this.hideHomRef)
                                        continue;
                                    refBase = gTypeToSymbol.apply(g.getType());
                                    break;
                                }
                            }
                            out.print(refBase);
                            ++ref;
                            ++x;
                        }
                    }
                    out.println();
                }
            } else // no genotype
            {
                for (final VariantContext ctx : variants) {
                    out.print(margin(String.valueOf(ctx.getStart()) + ":" + ctx.getReference().getDisplayString() + "/" + ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
                    ref = this.interval.getStart();
                    x = 0;
                    while (x < pixelWidth) {
                        if (insertIsPresentAtX.test(x)) {
                            out.print("*");
                            ++x;
                        } else {
                            out.print(ctx.getStart() == ref ? '+' : ' ');
                            ++ref;
                            ++x;
                        }
                    }
                    out.println();
                }
            }
        }
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintStream(java.io.PrintStream) Counter(com.github.lindenb.jvarkit.util.Counter) Predicate(java.util.function.Predicate) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) TabixKnownGeneFileReader(com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Closeable(java.io.Closeable) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Predicate(java.util.function.Predicate) SamReader(htsjdk.samtools.SamReader) Function(java.util.function.Function) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) TreeMap(java.util.TreeMap) CigarElement(htsjdk.samtools.CigarElement) SAMRecord(htsjdk.samtools.SAMRecord) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14