Examples with Counter - com.github.lindenb.jvarkit.util.Counter

Example 1 with Counter

use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.

the class BamToSVG method printSamRecord.

private void printSamRecord(XMLStreamWriter w, final SAMRecord record, final Map<Integer, Counter<Character>> consensus) throws XMLStreamException {
    double mid_y = this.featureHeight / 2.0;
    final double y_h95 = this.featureHeight * 0.90;
    final double y_top5 = (this.featureHeight - y_h95) / 2.0;
    final double y_bot5 = y_top5 + y_h95;
    final double arrow_w = this.featureHeight / 3.0;
    w.writeStartElement(SVG.NS, "g");
    String title = record.getReadName();
    w.writeAttribute("title", title);
    /* print that sam record */
    final int unclipped_start = record.getUnclippedStart();
    Cigar cigar = record.getCigar();
    if (cigar == null)
        return;
    byte[] bases = record.getReadBases();
    if (bases == null)
        return;
    byte[] qualities = record.getBaseQualities();
    if (qualities == null)
        return;
    int readPos = 0;
    Map<Integer, String> pos2insertions = new HashMap<Integer, String>();
    List<CigarElement> cigarElements = cigar.getCigarElements();
    /* find position of arrow */
    int arrow_cigar_index = -1;
    for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
        CigarElement ce = cigarElements.get(cidx);
        CigarOperator op = ce.getOperator();
        switch(op) {
            // threw
            case H:
            // threw
            case S:
                if (!this.showClipping)
                    break;
            case M:
            case EQ:
            case X:
                {
                    arrow_cigar_index = cidx;
                }
            default:
                break;
        }
        if (record.getReadNegativeStrandFlag() && arrow_cigar_index != -1) {
            break;
        }
    }
    int refPos = unclipped_start;
    /* loop over cigar string */
    for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
        CigarElement ce = cigarElements.get(cidx);
        CigarOperator op = ce.getOperator();
        boolean in_clip = false;
        switch(ce.getOperator()) {
            case D:
            case N:
                {
                    int c_start = trim(refPos);
                    int c_end = trim(refPos + ce.getLength());
                    if (c_start < c_end) {
                        w.writeEmptyElement("line");
                        w.writeAttribute("class", "indel");
                        w.writeAttribute("title", op.name() + ce.getLength());
                        w.writeAttribute("x1", format(baseToPixel(c_start)));
                        w.writeAttribute("x2", format(baseToPixel(c_end)));
                        w.writeAttribute("y1", format(mid_y));
                        w.writeAttribute("y2", format(mid_y));
                    }
                    refPos += ce.getLength();
                    break;
                }
            case I:
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < ce.getLength(); ++i) {
                        sb.append((char) bases[readPos++]);
                    }
                    pos2insertions.put(refPos, sb.toString());
                    break;
                }
            case H:
                {
                    if (!this.showClipping) {
                        refPos += ce.getLength();
                        break;
                    }
                    in_clip = true;
                // NO break;
                }
            case S:
                {
                    if (!this.showClipping) {
                        readPos += ce.getLength();
                        refPos += ce.getLength();
                        break;
                    }
                    in_clip = true;
                // NO break;
                }
            case X:
            case EQ:
            case M:
                {
                    int match_start = refPos;
                    int match_end = refPos + ce.getLength();
                    // print sam background
                    StringBuilder sb = new StringBuilder();
                    if (record.getReadNegativeStrandFlag() && match_start >= this.interval.start && match_start <= this.interval.end && cidx == arrow_cigar_index) {
                        sb.append(" M ").append(format(baseToPixel(match_start) + arrow_w)).append(',').append(y_top5);
                        sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w)));
                        sb.append(" L ").append(format(baseToPixel(match_start))).append(',').append(mid_y);
                        sb.append(" Z");
                    } else if (!record.getReadNegativeStrandFlag() && match_end >= this.interval.start && match_end <= this.interval.end && cidx == arrow_cigar_index) {
                        sb.append(" M ").append(format(baseToPixel(match_end) - arrow_w)).append(',').append(y_top5);
                        sb.append(" h ").append(format(-(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w)));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w));
                        sb.append(" L ").append(format(baseToPixel(match_end))).append(',').append(mid_y);
                        sb.append(" Z");
                    } else {
                        sb.append(" M ").append(format(baseToPixel(trim(match_start)))).append(',').append(y_top5);
                        sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start))));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start)))));
                        sb.append(" Z");
                    }
                    w.writeEmptyElement("path");
                    w.writeAttribute("d", sb.toString().trim());
                    if (ce.getOperator() == CigarOperator.H || ce.getOperator() == CigarOperator.S) {
                        w.writeAttribute("style", "fill:yellow;");
                    } else {
                        w.writeAttribute("style", "fill:url(#f" + record.getFlags() + ");");
                    }
                    if (op.consumesReadBases()) {
                        for (int i = 0; i < ce.getLength(); ++i) {
                            char ca = (char) bases[readPos];
                            if (!in_clip) {
                                Counter<Character> count = consensus.get(refPos + i);
                                if (count == null) {
                                    count = new Counter<>();
                                    consensus.put(refPos + i, count);
                                }
                                count.incr(ca);
                            }
                            char cb = 'N';
                            if (genomicSequence != null) {
                                cb = Character.toUpperCase(genomicSequence.charAt(refPos + i - 1));
                            }
                            if (cb != 'N' && ca != 'N' && cb != ca && !in_clip) {
                                w.writeEmptyElement("use");
                                w.writeAttribute("x", format(baseToPixel(refPos + i)));
                                // w.writeAttribute("y",format(y_top));never needed
                                w.writeAttribute("xlink", XLINK.NS, "href", "#mut");
                            }
                            if (this.interval.contains(refPos + i) && this.featureWidth > 5) {
                                // int qual = qualities[readPos];
                                w.writeEmptyElement("use");
                                w.writeAttribute("x", format(baseToPixel(refPos + i)));
                                // w.writeAttribute("y",format(y_top));never needed
                                w.writeAttribute("xlink", XLINK.NS, "href", "#b" + ca);
                            }
                            readPos++;
                        }
                    }
                    refPos += ce.getLength();
                    break;
                }
            default:
                {
                    throw new RuntimeException("Unknown SAM op: " + ce.getOperator());
                }
        }
    }
    for (Integer pos : pos2insertions.keySet()) {
        if (pos < this.interval.start)
            continue;
        if (pos > this.interval.end)
            continue;
        String insertion = pos2insertions.get(pos);
        w.writeEmptyElement("line");
        double x = baseToPixel(pos);
        w.writeAttribute("title", "Insertion " + insertion);
        w.writeAttribute("class", "insert");
        w.writeAttribute("x1", format(x));
        w.writeAttribute("x2", format(x));
        w.writeAttribute("y1", format(y_top5));
        w.writeAttribute("y2", format(y_bot5));
    }
    // g
    w.writeEndElement();
}

Also used : HashMap(java.util.HashMap) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) Cigar(htsjdk.samtools.Cigar) Counter(com.github.lindenb.jvarkit.util.Counter)

Example 2 with Counter

use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.

the class BamToSVG method printSample.

private void printSample(XMLStreamWriter w, double top_y, final Sample sample) throws XMLStreamException {
    w.writeComment("START SAMPLE: " + sample.name);
    w.writeStartElement(SVG.NS, "g");
    w.writeAttribute("transform", "translate(0," + top_y + ")");
    double y = 0;
    /* write title */
    w.writeEmptyElement("path");
    w.writeAttribute("class", "samplename");
    w.writeAttribute("title", sample.name);
    w.writeAttribute("d", this.hershey.svgPath(sample.name, scaleRect(new Rectangle2D.Double(0, y, this.drawinAreaWidth, HEIGHT_SAMPLE_NAME))));
    y += HEIGHT_SAMPLE_NAME;
    /* write REFERENCE */
    w.writeComment("REFERENCE");
    for (int pos = this.interval.start; // ignore if too small
    this.featureWidth > 5 && pos <= this.interval.end; ++pos) {
        char c = (this.genomicSequence == null ? 'N' : this.genomicSequence.charAt(pos - 1));
        double x0 = baseToPixel(pos);
        w.writeEmptyElement("use");
        w.writeAttribute("x", format(x0));
        w.writeAttribute("y", format(y));
        w.writeAttribute("xlink", XLINK.NS, "href", "#b" + c);
    }
    y += featureHeight;
    /* skip line for later consensus */
    double consensus_y = y;
    y += featureHeight;
    Map<Integer, Counter<Character>> pos2consensus = new HashMap<Integer, Counter<Character>>();
    /* print variants */
    if (!pos2variant.isEmpty()) {
        for (VariantContext ctx : this.pos2variant) {
            Genotype g = ctx.getGenotype(sample.name);
            if (g == null || !g.isCalled() || g.isHomRef())
                continue;
            if (ctx.getEnd() < this.interval.start)
                continue;
            if (ctx.getStart() > this.interval.end)
                continue;
            double x0 = baseToPixel(ctx.getStart());
            w.writeEmptyElement("use");
            w.writeAttribute("x", format(x0));
            w.writeAttribute("y", format(y));
            w.writeAttribute("title", ctx.getAltAlleleWithHighestAlleleCount().getDisplayString());
            if (g.isHomVar()) {
                w.writeAttribute("xlink", XLINK.NS, "href", "#homvar");
            } else if (g.isHet()) {
                w.writeAttribute("xlink", XLINK.NS, "href", "#het");
            }
        }
        y += featureHeight;
    }
    /* print all lines */
    w.writeComment("Alignments");
    for (int nLine = 0; nLine < sample.lines.size(); ++nLine) {
        w.writeStartElement("g");
        w.writeAttribute("transform", "translate(0," + format(y + nLine * featureHeight) + ")");
        List<SAMRecord> line = sample.lines.get(nLine);
        // loop over records on that line
        for (SAMRecord record : line) {
            printSamRecord(w, record, pos2consensus);
        }
        // g
        w.writeEndElement();
    }
    /* write consensus */
    w.writeComment("Consensus");
    for (int pos = this.interval.start; // ignore if too small
    this.featureWidth > 5 && pos <= this.interval.end; ++pos) {
        Counter<Character> cons = pos2consensus.get(pos);
        if (cons == null)
            continue;
        char c = cons.getMostFrequent();
        double x0 = baseToPixel(pos);
        w.writeEmptyElement("use");
        w.writeAttribute("x", format(x0));
        w.writeAttribute("y", format(consensus_y));
        w.writeAttribute("xlink", XLINK.NS, "href", "#b" + c);
    }
    /* surrounding frame for that sample */
    w.writeEmptyElement("rect");
    w.writeAttribute("class", "frame");
    w.writeAttribute("x", format(0));
    w.writeAttribute("y", format(0));
    w.writeAttribute("width", format(drawinAreaWidth));
    w.writeAttribute("height", format(sample.getHeight()));
    // g for sample
    w.writeEndElement();
    w.writeComment("END SAMPLE: " + sample.name);
}

Also used : HashMap(java.util.HashMap) Rectangle2D(java.awt.geom.Rectangle2D) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecord(htsjdk.samtools.SAMRecord)

Example 3 with Counter

use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.

the class BamStats02 method run.

private void run(String filename, SamReader r, PrintWriter out) {
    final Counter<Category> counter = new Counter<>();
    SAMRecordIterator iter = null;
    try {
        iter = r.iterator();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord record = progress.watch(iter.next());
            final Category cat = new Category();
            cat.set(STRING_PROPS.filename, filename);
            cat.flag = record.getFlags();
            cat.set(INT_PROPS.inTarget, -1);
            final SAMReadGroupRecord g = record.getReadGroup();
            if (g != null) {
                cat.set(STRING_PROPS.samplename, g.getSample());
                cat.set(STRING_PROPS.platform, g.getPlatform());
                cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
                cat.set(STRING_PROPS.library, g.getLibrary());
            }
            final ShortReadName readName = ShortReadName.parse(record);
            if (readName.isValid()) {
                cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
                cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
                cat.set(INT_PROPS.lane, readName.getFlowCellLane());
                cat.set(INT_PROPS.run, readName.getRunId());
            }
            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
            }
            if (!record.getReadUnmappedFlag()) {
                cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
                cat.set(STRING_PROPS.chromosome, record.getReferenceName());
                if (this.intervals != null) {
                    if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
                        cat.set(INT_PROPS.inTarget, 1);
                    } else {
                        cat.set(INT_PROPS.inTarget, 0);
                    }
                }
            }
            counter.incr(cat);
        }
        progress.finish();
        for (final Category cat : counter.keySetDecreasing()) {
            cat.print(out);
            out.print("\t");
            out.print(counter.count(cat));
            out.println();
        }
        out.flush();
    } finally {
        CloserUtil.close(iter);
    }
}

Also used : Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ShortReadName(com.github.lindenb.jvarkit.util.illumina.ShortReadName) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Interval(htsjdk.samtools.util.Interval)

Example 4 with Counter

use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.

the class Biostar154220 method doWork.

@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
    SAMFileHeader header = in.getFileHeader();
    if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
        LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
        return -1;
    }
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        LOG.error("no dict !");
        return -1;
    }
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    int prev_tid = -1;
    int[] depth_array = null;
    try {
        SAMFileHeader header2 = header.clone();
        header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
        out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        iter = in.iterator();
        List<SAMRecord> buffer = new ArrayList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
            }
            if (rec != null && rec.getReadUnmappedFlag()) {
                out.addAlignment(rec);
                continue;
            }
            // no more record or
            if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
                buffer.add(progress.watch(rec));
            } else if (buffer.isEmpty() && rec != null) {
                buffer.add(progress.watch(rec));
            } else // dump buffer
            {
                if (!buffer.isEmpty()) {
                    final int tid = buffer.get(0).getReferenceIndex();
                    if (prev_tid == -1 || prev_tid != tid) {
                        SAMSequenceRecord ssr = dict.getSequence(tid);
                        prev_tid = tid;
                        depth_array = null;
                        System.gc();
                        LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
                        // use a +1 pos
                        depth_array = new int[ssr.getSequenceLength() + 1];
                        Arrays.fill(depth_array, 0);
                    }
                    // position->coverage for this set of reads
                    Counter<Integer> readposition2coverage = new Counter<Integer>();
                    boolean dump_this_buffer = true;
                    for (final SAMRecord sr : buffer) {
                        if (!dump_this_buffer)
                            break;
                        if (this.filter.filterOut(sr))
                            continue;
                        final Cigar cigar = sr.getCigar();
                        if (cigar == null) {
                            throw new IOException("Cigar missing in " + rec.getSAMString());
                        }
                        int refPos1 = sr.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
                                    int cov = (int) readposition2coverage.incr(refPos1 + x);
                                    if (depth_array[refPos1 + x] + cov > this.capDepth) {
                                        dump_this_buffer = false;
                                        break;
                                    }
                                }
                            }
                            if (!dump_this_buffer)
                                break;
                            refPos1 += ce.getLength();
                        }
                    }
                    if (dump_this_buffer) {
                        // consumme this coverage
                        for (Integer pos : readposition2coverage.keySet()) {
                            depth_array[pos] += (int) readposition2coverage.count(pos);
                        }
                        for (SAMRecord sr : buffer) {
                            out.addAlignment(sr);
                        }
                    }
                    buffer.clear();
                }
                if (rec == null)
                    break;
                buffer.add(rec);
            }
        }
        depth_array = null;
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(out);
    }
}

Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 5 with Counter

use of com.github.lindenb.jvarkit.util.Counter in project jvarkit by lindenb.

the class HaloplexParasite method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    SamReader samReader = null;
    final List<Mutation> mutations = new ArrayList<>();
    try {
        final Set<File> bamFiles = Files.lines(this.bamList.toPath()).filter(T -> !(T.isEmpty() || T.startsWith("#"))).map(T -> new File(T)).collect(Collectors.toSet());
        final VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(in.getHeader().getSequenceDictionary());
        final VCFFilterHeaderLine filter = new VCFFilterHeaderLine("HALOPLEXPARASITE", "fails Parasite Haloplex Sequence");
        final VCFInfoHeaderLine infoWord = new VCFInfoHeaderLine(filter.getID(), 1, VCFHeaderLineType.String, "Parasite Haloplex Sequence (Word|Count|Fraction)");
        super.addMetaData(header);
        out.writeHeader(header);
        header.addMetaDataLine(filter);
        header.addMetaDataLine(infoWord);
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, ctx.getContig(), ctx.getStart(), ctx.getEnd(), ctx.getAlleles());
            if (!(ctx.isIndel() || ctx.isMixed())) {
                out.add(vcb.make());
                continue;
            }
            if (!vcb.getAlleles().stream().filter(A -> !(A.isSymbolic() || A.isNoCall() || A.length() < this.minClipSize)).findAny().isPresent()) {
                out.add(vcb.make());
                continue;
            }
            final Mutation mut = new Mutation(ctx);
            mutations.add(mut);
        }
        final Counter<String> words = new Counter<>();
        for (final File bamFile : bamFiles) {
            LOG.info("Opening " + bamFile);
            samReader = createSamReaderFactory().open(bamFile);
            for (final Mutation mut : mutations) {
                // words seen in that mutation : don't use a Counter
                final Set<String> mutWords = new HashSet<>();
                /* loop over reads overlapping that mutation */
                final SAMRecordIterator sri = samReader.queryOverlapping(mut.contig, mut.start, mut.end);
                while (sri.hasNext()) {
                    final SAMRecord rec = sri.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getDuplicateReadFlag())
                        continue;
                    if (rec.getReadFailsVendorQualityCheckFlag())
                        continue;
                    final Cigar cigar = rec.getCigar();
                    if (cigar.numCigarElements() == 1)
                        continue;
                    final byte[] bases = rec.getReadBases();
                    int refpos = rec.getUnclippedStart();
                    int readpos = 0;
                    /* scan cigar overlapping that mutation */
                    for (final CigarElement ce : cigar) {
                        final CigarOperator op = ce.getOperator();
                        final int ref_end = refpos + (op.consumesReferenceBases() || op.isClipping() ? ce.getLength() : 0);
                        final int read_end = readpos + (op.consumesReadBases() ? ce.getLength() : 0);
                        /* check clip is large enough */
                        if (op.equals(CigarOperator.S) && ce.getLength() >= this.minClipSize) {
                            /* check clip overlap mutation */
                            if (!(ref_end < mut.start || refpos > mut.end)) {
                                /* break read of soft clip into words */
                                for (int x = 0; x + this.minClipSize <= ce.getLength(); ++x) {
                                    final String substr = new String(bases, readpos + x, this.minClipSize);
                                    if (!substr.contains("N")) {
                                        final String revcomp = AcidNucleics.reverseComplement(substr);
                                        mutWords.add(substr);
                                        if (!revcomp.equals(substr))
                                            mutWords.add(revcomp);
                                    }
                                }
                            }
                        }
                        refpos = ref_end;
                        readpos = read_end;
                    }
                }
                sri.close();
                for (final String w : mutWords) {
                    words.incr(w);
                }
            }
            // end of for(mutations)
            samReader.close();
            samReader = null;
        }
        LOG.info("mutations:" + mutations.size());
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        for (final Mutation mut : mutations) {
            progress.watch(mut.contig, mut.start);
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, mut.contig, mut.start, mut.end, mut.alleles);
            String worstString = null;
            Double worstFraction = null;
            final double totalWords = words.getTotal();
            for (final Allele a : mut.alleles) {
                if (a.isSymbolic() || a.isNoCall() || a.length() < this.minClipSize)
                    continue;
                for (int x = 0; x + this.minClipSize <= a.length(); ++x) {
                    final String substr = new String(a.getBases(), x, this.minClipSize);
                    final long count = words.count(substr);
                    final double fraction = count / totalWords;
                    if (worstFraction == null || worstFraction < fraction) {
                        worstString = substr + "|" + count + "|" + fraction;
                        worstFraction = fraction;
                    }
                }
            }
            if (worstString != null) {
                vcb.attribute(infoWord.getID(), worstString);
            }
            if (worstFraction != null && worstFraction.doubleValue() >= this.tresholdFraction) {
                vcb.filter(filter.getID());
            }
            out.add(vcb.make());
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(samReader);
    }
}

Also used : Cigar(htsjdk.samtools.Cigar) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Aggregations

Counter (com.github.lindenb.jvarkit.util.Counter)17 SAMRecord (htsjdk.samtools.SAMRecord)8 ArrayList (java.util.ArrayList)8 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)7 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)7 Genotype (htsjdk.variant.variantcontext.Genotype)7 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 File (java.io.File)7 HashSet (java.util.HashSet)7 SamReader (htsjdk.samtools.SamReader)6 VCFHeader (htsjdk.variant.vcf.VCFHeader)6 List (java.util.List)6 Set (java.util.Set)6 Parameter (com.beust.jcommander.Parameter)5 Program (com.github.lindenb.jvarkit.util.jcommander.Program)5 Logger (com.github.lindenb.jvarkit.util.log.Logger)5 Cigar (htsjdk.samtools.Cigar)5 CigarElement (htsjdk.samtools.CigarElement)5 CigarOperator (htsjdk.samtools.CigarOperator)5 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)5