Examples with SimpleInterval - com.github.lindenb.jvarkit.samtools.util.SimpleInterval

Example 11 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class CoverageServer method printRaster.

/**
 * print BAM for small interval, displaying reads
 */
private void printRaster(final BamInput bam, final SimpleInterval midRegion, final SimpleInterval region, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException {
    final IntToDoubleFunction position2pixel = X -> ((X - region.getStart()) / (double) region.getLengthOnReference()) * (double) image_width;
    final SamReaderFactory srf = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidxRef);
    final Pileup<SAMRecord> pileup = new Pileup<>((L, R) -> position2pixel.applyAsDouble(L.getUnclippedEnd() + 1) + 1 < position2pixel.applyAsDouble(R.getUnclippedStart()));
    try (SamReader sr = srf.open(bam.bamPath)) {
        try (CloseableIterator<SAMRecord> iter = sr.query(region.getContig(), // extend to get clipR
        Math.max(0, region.getStart() - this.small_region_size), region.getEnd() + this.small_region_size, false)) {
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (!acceptRead(rec))
                    continue;
                if (rec.getUnclippedEnd() < region.getStart())
                    continue;
                if (rec.getUnclippedStart() > region.getEnd())
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.isEmpty())
                    continue;
                pileup.add(rec);
            }
        }
    // end iterator
    }
    // end samreader
    ReferenceSequence refInInterval = null;
    try (ReferenceSequenceFile refseq = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidxRef)) {
        final SAMSequenceRecord ssr = this.dictionary.getSequence(region.getContig());
        if (region.getStart() <= ssr.getSequenceLength()) {
            refInInterval = refseq.getSubsequenceAt(region.getContig(), region.getStart(), Math.min(region.getEnd(), ssr.getSequenceLength()));
        }
    }
    final BufferedImage img = new BufferedImage(image_width, image_height, BufferedImage.TYPE_INT_RGB);
    final Graphics2D g = img.createGraphics();
    g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
    g.setColor(new Color(240, 240, 240));
    g.fillRect(0, 0, image_width + 1, image_height + 1);
    final Stroke oldStroke = g.getStroke();
    g.setStroke(new BasicStroke(0.5f));
    g.setColor(Color.WHITE);
    final double mid_start = position2pixel.applyAsDouble(midRegion.getStart());
    final double mid_end = position2pixel.applyAsDouble(midRegion.getEnd() + 1);
    g.fill(new Rectangle2D.Double(mid_start, 0, (mid_end - mid_start), this.image_height));
    final int[] int_coverage = new int[region.getLengthOnReference()];
    final int margin_top = 12;
    final double featureHeight = Math.min(20, (this.image_height - margin_top) / Math.max(1.0, (double) pileup.getRowCount()));
    double y = image_height - featureHeight;
    for (final List<SAMRecord> row : pileup) {
        final double h2 = Math.min(featureHeight * 0.9, featureHeight - 2);
        for (final SAMRecord rec : row) {
            final Cigar cigar = rec.getCigar();
            if (cigar == null || cigar.isEmpty())
                continue;
            /* fill coverage array  */
            int ref1 = rec.getAlignmentStart();
            for (CigarElement ce : cigar.getCigarElements()) {
                if (ref1 > region.getEnd())
                    break;
                final CigarOperator op = ce.getOperator();
                if (op.consumesReferenceBases()) {
                    if (op.consumesReadBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            int pos = ref1 + x;
                            if (pos < region.getStart())
                                continue;
                            if (pos > region.getEnd())
                                break;
                            int_coverage[pos - region.getStart()]++;
                        }
                    }
                    ref1 += ce.getLength();
                }
            }
            /* draw rec itself */
            final double midy = y + h2 / 2.0;
            g.setColor(Color.DARK_GRAY);
            g.draw(new Line2D.Double(position2pixel.applyAsDouble(rec.getUnclippedStart()), midy, position2pixel.applyAsDouble(rec.getUnclippedEnd()), midy));
            ref1 = rec.getUnclippedStart();
            final List<Double> insertions = new ArrayList<>();
            for (final CigarElement ce : cigar.getCigarElements()) {
                if (ref1 > region.getEnd())
                    break;
                final CigarOperator op = ce.getOperator();
                Shape shape = null;
                Color fill = null;
                Color stroke = Color.DARK_GRAY;
                switch(op) {
                    case P:
                        break;
                    // through
                    case M:
                    // through
                    case X:
                    // through
                    case EQ:
                    // through
                    case S:
                    case H:
                        final double x1 = position2pixel.applyAsDouble(ref1);
                        shape = new Rectangle2D.Double(x1, y, position2pixel.applyAsDouble(ref1 + ce.getLength()) - x1, h2);
                        ref1 += ce.getLength();
                        switch(op) {
                            case H:
                            case S:
                                fill = Color.YELLOW;
                                break;
                            case X:
                                fill = Color.RED;
                                break;
                            case EQ:
                            case M:
                                fill = Color.LIGHT_GRAY;
                                break;
                            default:
                                break;
                        }
                        break;
                    // through
                    case N:
                    case D:
                        shape = null;
                        fill = null;
                        stroke = null;
                        ref1 += ce.getLength();
                        break;
                    case I:
                        shape = null;
                        fill = null;
                        stroke = null;
                        insertions.add(position2pixel.applyAsDouble(ref1));
                        break;
                    default:
                        throw new IllegalStateException("" + op);
                }
                if (ref1 < region.getStart())
                    continue;
                if (shape != null) {
                    if (fill != null) {
                        g.setColor(fill);
                        g.fill(shape);
                    }
                    if (stroke != null && h2 > 4) {
                        g.setColor(stroke);
                        g.draw(shape);
                    }
                }
            }
            /* draw mismatched bases */
            if (refInInterval != null && rec.getReadBases() != null && rec.getReadBases() != SAMRecord.NULL_SEQUENCE) {
                final byte[] bases = rec.getReadBases();
                final IntFunction<Character> baseRead = IDX -> IDX < 0 || IDX >= bases.length || bases == SAMRecord.NULL_SEQUENCE ? 'N' : (char) Character.toUpperCase(bases[IDX]);
                int read0 = 0;
                ref1 = rec.getAlignmentStart();
                for (CigarElement ce : cigar.getCigarElements()) {
                    if (ref1 > region.getEnd())
                        break;
                    final CigarOperator op = ce.getOperator();
                    switch(op) {
                        case P:
                            break;
                        case H:
                            break;
                        case D:
                        case N:
                            ref1 += ce.getLength();
                            break;
                        case S:
                        case I:
                            read0 += ce.getLength();
                            break;
                        case EQ:
                        case M:
                        case X:
                            {
                                for (int j = 0; j < ce.getLength(); j++) {
                                    if (ref1 + j < region.getStart())
                                        continue;
                                    if (ref1 + j >= region.getStart() + refInInterval.length())
                                        break;
                                    final int ref_base_idx = ref1 - region.getStart() + j;
                                    char ctgBase = (char) (ref_base_idx < 0 || ref_base_idx >= refInInterval.length() ? 'N' : Character.toUpperCase(refInInterval.getBases()[ref_base_idx]));
                                    if (ctgBase == 'N')
                                        continue;
                                    char readBase = baseRead.apply(read0 + j);
                                    if (readBase == 'N')
                                        continue;
                                    if (readBase == ctgBase)
                                        continue;
                                    g.setColor(Color.ORANGE);
                                    final double x1 = position2pixel.applyAsDouble(ref1 + j);
                                    final double x2 = position2pixel.applyAsDouble(ref1 + j + 1);
                                    g.fill(new Rectangle2D.Double(x1, y, x2 - x1, h2));
                                }
                                read0 += ce.getLength();
                                ref1 += ce.getLength();
                                break;
                            }
                        default:
                            break;
                    }
                }
            }
            for (double px : insertions) {
                g.setColor(Color.RED);
                g.draw(new Line2D.Double(px, y - 0.5, px, y + h2 + 0.5));
            }
        }
        y -= featureHeight;
    }
    double max_cov = IntStream.of(int_coverage).max().orElse(0);
    g.setColor(Color.DARK_GRAY);
    g.drawString("Sample:" + bam.sample + " max-cov:" + (int) max_cov + " " + region.toNiceString(), 10, 10);
    /* plot coverage */
    final GeneralPath gp = new GeneralPath();
    for (int i = 0; max_cov > 0 && i < int_coverage.length; ++i) {
        final double x1 = position2pixel.applyAsDouble(region.getStart() + i);
        final double x2 = position2pixel.applyAsDouble(region.getStart() + i + 1);
        final double y1 = image_height - (int_coverage[i] / max_cov) * (image_height - margin_top);
        if (i == 0)
            gp.moveTo(x1, y1);
        else
            gp.lineTo(x1, y1);
        gp.lineTo(x2, y1);
    }
    g.setStroke(new BasicStroke(0.5f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_BEVEL, 0f, new float[] { 1f, 2f, 1f }, 0f));
    g.setColor(Color.BLUE);
    g.draw(gp);
    g.setStroke(oldStroke);
    writeGenes(g, region);
    writeKnownCnv(g, region);
    g.setColor(Color.PINK);
    g.draw(new Line2D.Double(mid_start, 0, mid_start, image_height));
    g.draw(new Line2D.Double(mid_end, 0, mid_end, image_height));
    writeImage(img, bam, region, response);
}

Also used : Color(java.awt.Color) ServletContextHandler(org.eclipse.jetty.servlet.ServletContextHandler) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ServletException(javax.servlet.ServletException) Rectangle2D(java.awt.geom.Rectangle2D) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) RenderingHints(java.awt.RenderingHints) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) Vector(java.util.Vector) XMLStreamException(javax.xml.stream.XMLStreamException) ImageIO(javax.imageio.ImageIO) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) HttpStatus(org.eclipse.jetty.http.HttpStatus) Path(java.nio.file.Path) Server(org.eclipse.jetty.server.Server) CloserUtil(htsjdk.samtools.util.CloserUtil) Shape(java.awt.Shape) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) HttpServlet(javax.servlet.http.HttpServlet) Composite(java.awt.Composite) BufferedImage(java.awt.image.BufferedImage) Logger(com.github.lindenb.jvarkit.util.log.Logger) StandardOpenOption(java.nio.file.StandardOpenOption) ServletHolder(org.eclipse.jetty.servlet.ServletHolder) Set(java.util.Set) Collectors(java.util.stream.Collectors) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) SAMRecord(htsjdk.samtools.SAMRecord) DoubleStream(java.util.stream.DoubleStream) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) Stream(java.util.stream.Stream) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) ToDoubleFunction(java.util.function.ToDoubleFunction) BasicStroke(java.awt.BasicStroke) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) GeneralPath(java.awt.geom.GeneralPath) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntStream(java.util.stream.IntStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) IterableAdapter(htsjdk.samtools.util.IterableAdapter) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) Interval(htsjdk.samtools.util.Interval) AlphaComposite(java.awt.AlphaComposite) HttpServletRequest(javax.servlet.http.HttpServletRequest) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Graphics2D(java.awt.Graphics2D) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixReader(htsjdk.tribble.readers.TabixReader) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) StreamSupport(java.util.stream.StreamSupport) IntFunction(java.util.function.IntFunction) VCFConstants(htsjdk.variant.vcf.VCFConstants) Stroke(java.awt.Stroke) Line2D(java.awt.geom.Line2D) Counter(com.github.lindenb.jvarkit.util.Counter) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) IntToDoubleFunction(java.util.function.IntToDoubleFunction) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) HttpServletResponse(javax.servlet.http.HttpServletResponse) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) Pileup(com.github.lindenb.jvarkit.samtools.util.Pileup) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Sample(com.github.lindenb.jvarkit.pedigree.Sample) BasicStroke(java.awt.BasicStroke) Shape(java.awt.Shape) GeneralPath(java.awt.geom.GeneralPath) ArrayList(java.util.ArrayList) IntToDoubleFunction(java.util.function.IntToDoubleFunction) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) Line2D(java.awt.geom.Line2D) BufferedImage(java.awt.image.BufferedImage) Pileup(com.github.lindenb.jvarkit.samtools.util.Pileup) SamReader(htsjdk.samtools.SamReader) BasicStroke(java.awt.BasicStroke) Stroke(java.awt.Stroke) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Color(java.awt.Color) Rectangle2D(java.awt.geom.Rectangle2D) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) Graphics2D(java.awt.Graphics2D) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord)

Example 12 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

@Override
public int doWork(final List<String> args) {
    final int bad_mapq = 30;
    try {
        if (this.min_clip_depth > this.min_depth) {
            LOG.error("this.min_clip_depth>this.min_depth");
            return -1;
        }
        if (this.fraction < 0 || this.fraction > 1.0) {
            LOG.error("bad ratio: " + fraction);
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceFai);
        final List<Path> inputs = IOUtils.unrollPaths(args);
        if (inputs.isEmpty()) {
            LOG.error("input is missing");
            return -1;
        }
        final IntervalTreeMap<Interval> intronMap = new IntervalTreeMap<>();
        if (this.gtfPath != null) {
            try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
                gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
                gtfReader.getAllGenes().stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.hasIntron()).flatMap(T -> T.getIntrons().stream()).map(I -> new Interval(I.getContig(), I.getStart(), I.getEnd(), I.isNegativeStrand(), I.getTranscript().getId())).forEach(I -> intronMap.put(I, I));
            }
        }
        final List<String> sample_list = new ArrayList<>();
        final QueryInterval[] queryIntervals;
        if (this.bedPath == null) {
            queryIntervals = null;
        } else {
            try (BedLineReader br = new BedLineReader(this.bedPath).setValidationStringency(ValidationStringency.LENIENT).setContigNameConverter(ContigNameConverter.fromOneDictionary(dict))) {
                queryIntervals = br.optimizeIntervals(dict);
            }
        }
        SortingCollection<Base> sortingCollection = SortingCollection.newInstance(Base.class, new BaseCodec(), (A, B) -> A.compare1(B), writingSortingCollection.getMaxRecordsInRam(), writingSortingCollection.getTmpPath());
        sortingCollection.setDestructiveIteration(true);
        final Predicate<Base> acceptBase = B -> {
            return B.clip() > 0;
        };
        final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.referenceFai);
        for (final Path path : inputs) {
            try (SamReader sr = srf.open(path)) {
                final SAMFileHeader header = sr.getFileHeader();
                SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(header));
                final String sample_name = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                if (sample_list.contains(sample_name)) {
                    LOG.error("duplicate sample " + sample_name + " in " + path);
                    return -1;
                }
                final int sample_idx = sample_list.size();
                sample_list.add(sample_name);
                int prev_tid = -1;
                final SortedMap<Integer, Base> pos2base = new TreeMap<>();
                /* get base in pos2base, create it if needed */
                final BiFunction<Integer, Integer, Base> baseAt = (TID, POS) -> {
                    Base b = pos2base.get(POS);
                    if (b == null) {
                        b = new Base(sample_idx, TID, POS);
                        pos2base.put(POS, b);
                    }
                    return b;
                };
                try (CloseableIterator<SAMRecord> iter = queryIntervals == null ? sr.iterator() : sr.query(queryIntervals, false)) {
                    for (; ; ) {
                        final SAMRecord rec = (iter.hasNext() ? iter.next() : null);
                        if (rec != null && !SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                            continue;
                        if (rec == null || prev_tid != rec.getReferenceIndex().intValue()) {
                            for (final Integer pos : pos2base.keySet()) {
                                final Base b = pos2base.get(pos);
                                if (acceptBase.test(b))
                                    sortingCollection.add(b);
                            }
                            if (rec == null)
                                break;
                            pos2base.clear();
                            prev_tid = rec.getReferenceIndex().intValue();
                        }
                        /* pop back previous bases */
                        for (Iterator<Integer> rpos = pos2base.keySet().iterator(); rpos.hasNext(); ) {
                            final Integer pos = rpos.next();
                            if (pos.intValue() + this.max_clip_length >= rec.getUnclippedStart())
                                break;
                            final Base b = pos2base.get(pos);
                            if (acceptBase.test(b))
                                sortingCollection.add(b);
                            rpos.remove();
                        }
                        final Cigar cigar = rec.getCigar();
                        int refPos = rec.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (op.consumesReferenceBases()) {
                                if (op.consumesReadBases()) {
                                    for (int x = 0; x < ce.getLength(); ++x) {
                                        final Base gt = baseAt.apply(prev_tid, refPos + x);
                                        gt.noClip++;
                                        gt.noClip_sum_mapq += rec.getMappingQuality();
                                    }
                                } else if (op.equals(CigarOperator.D) || op.equals(CigarOperator.N)) {
                                    baseAt.apply(prev_tid, refPos).del++;
                                    baseAt.apply(prev_tid, refPos + ce.getLength() - 1).del++;
                                }
                                refPos += ce.getLength();
                            }
                        }
                        CigarElement ce = cigar.getFirstCigarElement();
                        if (ce != null && ce.getOperator().isClipping() && ce.getLength() >= this.min_clip_operator_length) {
                            baseAt.apply(prev_tid, rec.getStart() - 1).leftClip++;
                        }
                        ce = cigar.getLastCigarElement();
                        if (ce != null && ce.getOperator().isClipping() && ce.getLength() >= this.min_clip_operator_length) {
                            baseAt.apply(prev_tid, rec.getEnd() + 1).rightClip++;
                        }
                    }
                }
                /* write last bases */
                for (final Integer pos : pos2base.keySet()) {
                    final Base b = pos2base.get(pos);
                    if (acceptBase.test(b))
                        sortingCollection.add(b);
                }
            }
        // end open reader
        }
        // end loop sam files
        sortingCollection.doneAdding();
        /* build VCF header */
        final Allele reference_allele = Allele.create("N", true);
        final Allele alt_allele = Allele.create("<CLIP>", false);
        final Set<VCFHeaderLine> vcfHeaderLines = new HashSet<>();
        VCFStandardHeaderLines.addStandardFormatLines(vcfHeaderLines, true, VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS);
        VCFStandardHeaderLines.addStandardInfoLines(vcfHeaderLines, true, VCFConstants.DEPTH_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_FREQUENCY_KEY);
        final VCFFormatHeaderLine leftClip = new VCFFormatHeaderLine("CL", 1, VCFHeaderLineType.Integer, "Left Clip");
        vcfHeaderLines.add(leftClip);
        final VCFFormatHeaderLine rightClip = new VCFFormatHeaderLine("RL", 1, VCFHeaderLineType.Integer, "Right Clip");
        vcfHeaderLines.add(rightClip);
        final VCFFormatHeaderLine totalCip = new VCFFormatHeaderLine("TL", 1, VCFHeaderLineType.Integer, "Total Clip");
        vcfHeaderLines.add(totalCip);
        final VCFFormatHeaderLine totalDel = new VCFFormatHeaderLine("DL", 1, VCFHeaderLineType.Integer, "Total Deletions");
        vcfHeaderLines.add(totalDel);
        final VCFFormatHeaderLine noClipMAPQ = new VCFFormatHeaderLine("MQ", 1, VCFHeaderLineType.Integer, "Average MAPQ for reads without clip at this position.");
        vcfHeaderLines.add(noClipMAPQ);
        final VCFInfoHeaderLine averageMAPQ = new VCFInfoHeaderLine("AVG_MAPQ", 1, VCFHeaderLineType.Integer, "Average MAPQ for called genotypes");
        vcfHeaderLines.add(averageMAPQ);
        final VCFInfoHeaderLine infoRetrogene = new VCFInfoHeaderLine("RETROGENE", 1, VCFHeaderLineType.String, "transcript name for Possible retrogene.");
        vcfHeaderLines.add(infoRetrogene);
        final VCFFilterHeaderLine filterRetrogene = new VCFFilterHeaderLine("POSSIBLE_RETROGENE", "Junction is a possible Retrogene.");
        vcfHeaderLines.add(filterRetrogene);
        final VCFFilterHeaderLine filterlowMapq = new VCFFilterHeaderLine("LOW_MAPQ", "Low average mapq (< " + bad_mapq + ")");
        vcfHeaderLines.add(filterlowMapq);
        final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample_list);
        vcfHeader.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, vcfHeader);
        this.writingVcfConfig.dictionary(dict);
        try (VariantContextWriter w = this.writingVcfConfig.open(this.outputFile)) {
            w.writeHeader(vcfHeader);
            try (CloseableIterator<Base> r1 = sortingCollection.iterator()) {
                try (EqualRangeIterator<Base> r2 = new EqualRangeIterator<>(r1, (A, B) -> A.compare2(B))) {
                    while (r2.hasNext()) {
                        final List<Base> array = r2.next();
                        final Base first = array.get(0);
                        if (first.pos < 1)
                            continue;
                        // no clip
                        if (array.stream().mapToInt(G -> G.clip()).sum() == 0)
                            continue;
                        if (array.stream().allMatch(G -> G.clip() < min_clip_depth))
                            continue;
                        if (array.stream().allMatch(G -> G.dp() < min_depth))
                            continue;
                        if (array.stream().allMatch(G -> G.ratio() < fraction))
                            continue;
                        final VariantContextBuilder vcb = new VariantContextBuilder();
                        final String chrom = dict.getSequence(first.tid).getSequenceName();
                        vcb.chr(chrom);
                        vcb.start(first.pos);
                        vcb.stop(first.pos);
                        vcb.alleles(Arrays.asList(reference_allele, alt_allele));
                        vcb.attribute(VCFConstants.DEPTH_KEY, array.stream().mapToInt(G -> G.dp()).sum());
                        final List<Genotype> genotypes = new ArrayList<>(array.size());
                        int AC = 0;
                        int AN = 0;
                        int max_clip = 1;
                        double sum_mapq = 0.0;
                        int count_mapq = 0;
                        for (final Base gt : array) {
                            final GenotypeBuilder gb = new GenotypeBuilder(sample_list.get(gt.sample_idx));
                            if (gt.clip() == 0 && gt.noClip == 0) {
                                gb.alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                            } else if (gt.noClip == 0) {
                                gb.alleles(Arrays.asList(alt_allele, alt_allele));
                                AC += 2;
                                sum_mapq += gt.noClipMapq();
                                count_mapq++;
                                AN += 2;
                            } else if (gt.clip() == 0) {
                                gb.alleles(Arrays.asList(reference_allele, reference_allele));
                                AN += 2;
                            } else {
                                gb.alleles(Arrays.asList(reference_allele, alt_allele));
                                AC++;
                                sum_mapq += gt.noClipMapq();
                                count_mapq++;
                                AN += 2;
                            }
                            gb.DP(gt.dp());
                            gb.attribute(leftClip.getID(), gt.leftClip);
                            gb.attribute(rightClip.getID(), gt.rightClip);
                            gb.attribute(totalCip.getID(), gt.clip());
                            gb.attribute(totalDel.getID(), gt.del);
                            gb.attribute(noClipMAPQ.getID(), gt.noClipMapq());
                            gb.AD(new int[] { gt.noClip, gt.clip() });
                            genotypes.add(gb.make());
                            max_clip = Math.max(max_clip, gt.clip());
                        }
                        if (count_mapq > 0) {
                            final int avg_mapq = (int) (sum_mapq / count_mapq);
                            vcb.attribute(averageMAPQ.getID(), avg_mapq);
                            if (avg_mapq < bad_mapq)
                                vcb.filter(filterlowMapq.getID());
                        }
                        if (gtfPath != null) {
                            final Locatable bounds1 = new SimpleInterval(chrom, Math.max(1, first.pos - max_intron_distance), first.pos + max_intron_distance);
                            intronMap.getOverlapping(bounds1).stream().filter(I -> Math.abs(I.getStart() - first.pos) <= this.max_intron_distance || Math.abs(I.getEnd() - first.pos) <= this.max_intron_distance).map(I -> I.getName()).findFirst().ifPresent(transcript_id -> {
                                vcb.attribute(infoRetrogene.getID(), transcript_id);
                                vcb.filter(filterRetrogene.getID());
                            });
                            ;
                        }
                        vcb.log10PError(max_clip / -10.0);
                        vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, AC);
                        vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, AN);
                        if (AN > 0)
                            vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, AC / (float) AN);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                // end while r2
                }
            // end r2
            }
        // end r1
        }
        // end writer
        sortingCollection.cleanup();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}

Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) BiFunction(java.util.function.BiFunction) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SortedMap(java.util.SortedMap) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Locatable(htsjdk.samtools.util.Locatable) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) TreeMap(java.util.TreeMap) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) QueryInterval(htsjdk.samtools.QueryInterval) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval) Locatable(htsjdk.samtools.util.Locatable) QueryInterval(htsjdk.samtools.QueryInterval) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) TreeMap(java.util.TreeMap) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 13 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class ValidateCnv method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.extendFactor <= 0) {
        LOG.error("bad extend factor " + this.extendFactor);
        return -1;
    }
    if (this.treshold < 0 || this.treshold >= 0.25) {
        LOG.error("Bad treshold 0 < " + this.treshold + " >=0.25 ");
        return -1;
    }
    final Map<String, BamInfo> sample2bam = new HashMap<>();
    VariantContextWriter out = null;
    Iterator<? extends Locatable> iterIn = null;
    try {
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.rererencePath);
        final CRAMReferenceSource cramReferenceSource = new ReferenceSource(this.rererencePath);
        final List<Path> bamPaths = IOUtils.unrollPaths(this.bamFiles);
        final String input = oneFileOrNull(args);
        if (input == null) {
            iterIn = IntervalListProvider.empty().dictionary(dict).skipUnknownContigs().fromInputStream(stdin(), "bed").iterator();
        } else {
            final IntervalListProvider ilp = IntervalListProvider.from(input).setVariantPredicate(CTX -> {
                if (CTX.isSNP())
                    return false;
                final String svType = CTX.getAttributeAsString(VCFConstants.SVTYPE, "");
                if (svType != null && (svType.equals("INV") || svType.equals("BND")))
                    return false;
                return true;
            }).dictionary(dict).skipUnknownContigs();
            iterIn = ilp.stream().iterator();
        }
        /* register each bam */
        for (final Path p2 : bamPaths) {
            final BamInfo bi = new BamInfo(p2, cramReferenceSource);
            if (sample2bam.containsKey(bi.sampleName)) {
                LOG.error("sample " + bi.sampleName + " specified twice.");
                bi.close();
                return -1;
            }
            sample2bam.put(bi.sampleName, bi);
        }
        if (sample2bam.isEmpty()) {
            LOG.error("no bam was defined");
            return -1;
        }
        final Set<VCFHeaderLine> metadata = new HashSet<>();
        final VCFInfoHeaderLine infoSVSamples = new VCFInfoHeaderLine("N_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of Samples that could carry a SV");
        metadata.add(infoSVSamples);
        final VCFInfoHeaderLine infoSvLen = new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length");
        metadata.add(infoSvLen);
        final BiFunction<String, String, VCFFormatHeaderLine> makeFmt = (TAG, DESC) -> new VCFFormatHeaderLine(TAG, 1, VCFHeaderLineType.Integer, DESC);
        final VCFFormatHeaderLine formatCN = new VCFFormatHeaderLine("CN", 1, VCFHeaderLineType.Float, "normalized copy-number. Treshold was " + this.treshold);
        metadata.add(formatCN);
        final VCFFormatHeaderLine nReadsSupportingSv = makeFmt.apply("RSD", "number of split reads supporting SV.");
        metadata.add(nReadsSupportingSv);
        final VCFFilterHeaderLine filterAllDel = new VCFFilterHeaderLine("ALL_DEL", "number of samples greater than 1 and all are deletions");
        metadata.add(filterAllDel);
        final VCFFilterHeaderLine filterAllDup = new VCFFilterHeaderLine("ALL_DUP", "number of samples  greater than  1 and all are duplication");
        metadata.add(filterAllDup);
        final VCFFilterHeaderLine filterNoSV = new VCFFilterHeaderLine("NO_SV", "There is no DUP or DEL in this variant");
        metadata.add(filterNoSV);
        final VCFFilterHeaderLine filterHomDel = new VCFFilterHeaderLine("HOM_DEL", "There is one Homozygous deletion.");
        metadata.add(filterHomDel);
        final VCFFilterHeaderLine filterHomDup = new VCFFilterHeaderLine("HOM_DUP", "There is one Homozygous duplication.");
        metadata.add(filterHomDup);
        VCFStandardHeaderLines.addStandardFormatLines(metadata, true, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_FILTER_KEY, VCFConstants.GENOTYPE_QUALITY_KEY);
        VCFStandardHeaderLines.addStandardInfoLines(metadata, true, VCFConstants.DEPTH_KEY, VCFConstants.END_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY, VCFConstants.ALLELE_NUMBER_KEY);
        final VCFHeader header = new VCFHeader(metadata, sample2bam.keySet());
        if (dict != null)
            header.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, header);
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
        out = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
        out.writeHeader(header);
        final Allele DUP_ALLELE = Allele.create("<DUP>", false);
        final Allele DEL_ALLELE = Allele.create("<DEL>", false);
        final Allele REF_ALLELE = Allele.create("N", true);
        while (iterIn.hasNext()) {
            final Locatable ctx = iterIn.next();
            if (ctx == null)
                continue;
            final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
            if (ssr == null || ctx.getStart() >= ssr.getSequenceLength())
                continue;
            final int svLen = ctx.getLengthOnReference();
            if (svLen < this.min_abs_sv_size)
                continue;
            if (svLen > this.max_abs_sv_size)
                continue;
            int n_samples_with_cnv = 0;
            final SimplePosition breakPointLeft = new SimplePosition(ctx.getContig(), ctx.getStart());
            final SimplePosition breakPointRight = new SimplePosition(ctx.getContig(), ctx.getEnd());
            final int extend = 1 + (int) (svLen * this.extendFactor);
            final int leftPos = Math.max(1, breakPointLeft.getPosition() - extend);
            final int array_mid_start = breakPointLeft.getPosition() - leftPos;
            final int array_mid_end = breakPointRight.getPosition() - leftPos;
            final int rightPos = Math.min(breakPointRight.getPosition() + extend, ssr.getSequenceLength());
            final VariantContextBuilder vcb = new VariantContextBuilder();
            vcb.chr(ctx.getContig());
            vcb.start(ctx.getStart());
            vcb.stop(ctx.getEnd());
            vcb.attribute(VCFConstants.END_KEY, ctx.getEnd());
            final Set<Allele> alleles = new HashSet<>();
            alleles.add(REF_ALLELE);
            int count_dup = 0;
            int count_del = 0;
            int an = 0;
            final Counter<Allele> countAlleles = new Counter<>();
            final List<Genotype> genotypes = new ArrayList<>(sample2bam.size());
            Double badestGQ = null;
            final double[] raw_coverage = new double[CoordMath.getLength(leftPos, rightPos)];
            for (final String sampleName : sample2bam.keySet()) {
                final BamInfo bi = sample2bam.get(sampleName);
                Arrays.fill(raw_coverage, 0.0);
                int n_reads_supporting_sv = 0;
                try (CloseableIterator<SAMRecord> iter2 = bi.samReader.queryOverlapping(ctx.getContig(), leftPos, rightPos)) {
                    while (iter2.hasNext()) {
                        final SAMRecord rec = iter2.next();
                        if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                            continue;
                        final Cigar cigar = rec.getCigar();
                        if (cigar == null || cigar.isEmpty())
                            continue;
                        // any clip supporting deletion ?
                        boolean read_supports_cnv = false;
                        final int breakpoint_distance = 10;
                        // any clip on left ?
                        if (cigar.isLeftClipped() && rec.getUnclippedStart() < rec.getAlignmentStart() && new SimpleInterval(ctx.getContig(), rec.getUnclippedStart(), rec.getAlignmentStart()).withinDistanceOf(breakPointLeft, breakpoint_distance)) {
                            read_supports_cnv = true;
                        }
                        // any clip on right ?
                        if (!read_supports_cnv && cigar.isRightClipped() && rec.getAlignmentEnd() < rec.getUnclippedEnd() && new SimpleInterval(ctx.getContig(), rec.getAlignmentEnd(), rec.getUnclippedEnd()).withinDistanceOf(breakPointRight, breakpoint_distance)) {
                            read_supports_cnv = true;
                        }
                        if (read_supports_cnv) {
                            n_reads_supporting_sv++;
                        }
                        int ref = rec.getStart();
                        for (final CigarElement ce : cigar) {
                            final CigarOperator op = ce.getOperator();
                            if (op.consumesReferenceBases()) {
                                if (op.consumesReadBases()) {
                                    for (int x = 0; x < ce.getLength() && ref + x - leftPos < raw_coverage.length; ++x) {
                                        final int p = ref + x - leftPos;
                                        if (p < 0 || p >= raw_coverage.length)
                                            continue;
                                        raw_coverage[p]++;
                                    }
                                }
                                ref += ce.getLength();
                            }
                        }
                    }
                // end while iter record
                }
                // end try query for iterator
                // test for great difference between DP left and DP right
                final OptionalDouble medianDepthLeft = Percentile.median().evaluate(raw_coverage, 0, array_mid_start);
                final OptionalDouble medianDepthRight = Percentile.median().evaluate(raw_coverage, array_mid_end, raw_coverage.length - array_mid_end);
                // any is just too low
                if (!medianDepthLeft.isPresent() || medianDepthLeft.getAsDouble() < this.min_depth || !medianDepthRight.isPresent() || medianDepthRight.getAsDouble() < this.min_depth) {
                    final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("LowDp").make();
                    genotypes.add(gt2);
                    continue;
                }
                final double difference_factor = 2.0;
                // even if a value is divided , it remains greater than the other size
                if (medianDepthLeft.getAsDouble() / difference_factor > medianDepthRight.getAsDouble() || medianDepthRight.getAsDouble() / difference_factor > medianDepthLeft.getAsDouble()) {
                    final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("DiffLR").make();
                    genotypes.add(gt2);
                    continue;
                }
                // run median to smooth spline
                final double[] smoothed_cov = new RunMedian(RunMedian.getTurlachSize(raw_coverage.length)).apply(raw_coverage);
                final double[] bounds_cov = IntStream.concat(IntStream.range(0, array_mid_start), IntStream.range(array_mid_end, smoothed_cov.length)).mapToDouble(IDX -> raw_coverage[IDX]).toArray();
                final OptionalDouble optMedianBound = Percentile.median().evaluate(bounds_cov);
                if (!optMedianBound.isPresent() || optMedianBound.getAsDouble() == 0) {
                    final Genotype gt2 = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("MedZero").make();
                    genotypes.add(gt2);
                    continue;
                }
                final double medianBound = optMedianBound.getAsDouble();
                // divide coverage per medianBound
                final double[] normalized_mid_coverage = new double[array_mid_end - array_mid_start];
                for (int i = 0; i < normalized_mid_coverage.length; ++i) {
                    normalized_mid_coverage[i] = smoothed_cov[array_mid_start + i] / medianBound;
                }
                final double normDepth = Percentile.median().evaluate(normalized_mid_coverage).getAsDouble();
                final boolean is_sv;
                final boolean is_hom_deletion = Math.abs(normDepth - 0.0) <= this.treshold;
                final boolean is_het_deletion = Math.abs(normDepth - 0.5) <= this.treshold || (!is_hom_deletion && normDepth <= 0.5);
                final boolean is_hom_dup = Math.abs(normDepth - 2.0) <= this.treshold || normDepth > 2.0;
                final boolean is_het_dup = Math.abs(normDepth - 1.5) <= this.treshold || (!is_hom_dup && normDepth >= 1.5);
                final boolean is_ref = Math.abs(normDepth - 1.0) <= this.treshold;
                final double theoritical_depth;
                final GenotypeBuilder gb;
                if (is_ref) {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, REF_ALLELE));
                    is_sv = false;
                    theoritical_depth = 1.0;
                    an += 2;
                } else if (is_het_deletion) {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DEL_ALLELE));
                    alleles.add(DEL_ALLELE);
                    is_sv = true;
                    theoritical_depth = 0.5;
                    count_del++;
                    an += 2;
                    countAlleles.incr(DEL_ALLELE);
                } else if (is_hom_deletion) {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(DEL_ALLELE, DEL_ALLELE));
                    alleles.add(DEL_ALLELE);
                    vcb.filter(filterHomDel.getID());
                    is_sv = true;
                    theoritical_depth = 0.0;
                    count_del++;
                    an += 2;
                    countAlleles.incr(DEL_ALLELE, 2);
                } else if (is_het_dup) {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DUP_ALLELE));
                    alleles.add(DUP_ALLELE);
                    is_sv = true;
                    theoritical_depth = 1.5;
                    count_dup++;
                    an += 2;
                    countAlleles.incr(DUP_ALLELE);
                } else if (is_hom_dup) {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(DUP_ALLELE, DUP_ALLELE));
                    alleles.add(DUP_ALLELE);
                    vcb.filter(filterHomDup.getID());
                    is_sv = true;
                    theoritical_depth = 2.0;
                    count_dup++;
                    an += 2;
                    countAlleles.incr(DUP_ALLELE, 2);
                } else {
                    gb = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).filter("Ambigous");
                    is_sv = false;
                    theoritical_depth = 1.0;
                }
                if (is_sv) {
                    n_samples_with_cnv++;
                }
                double gq = Math.abs(theoritical_depth - normDepth);
                gq = Math.min(0.5, gq);
                gq = gq * gq;
                gq = gq / 0.25;
                gq = 99 * (1.0 - gq);
                gb.GQ((int) gq);
                if (badestGQ == null || badestGQ.compareTo(gq) > 0) {
                    badestGQ = gq;
                }
                gb.attribute(formatCN.getID(), normDepth);
                gb.attribute(nReadsSupportingSv.getID(), n_reads_supporting_sv);
                genotypes.add(gb.make());
            }
            vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, an);
            final List<Allele> orderedAlleles = new ArrayList<>(alleles);
            Collections.sort(orderedAlleles);
            if (orderedAlleles.size() > 1) {
                final List<Integer> acL = new ArrayList<>();
                final List<Double> afL = new ArrayList<>();
                for (int i = 1; i < orderedAlleles.size(); i++) {
                    final Allele a = orderedAlleles.get(i);
                    final int c = (int) countAlleles.count(a);
                    acL.add(c);
                    if (an > 0)
                        afL.add(c / (double) an);
                }
                vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, acL);
                if (an > 0)
                    vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, afL);
            }
            // if(alleles.size()<=1) continue;
            vcb.alleles(orderedAlleles);
            vcb.noID();
            vcb.genotypes(genotypes);
            vcb.attribute(infoSVSamples.getID(), n_samples_with_cnv);
            vcb.attribute(infoSvLen.getID(), svLen);
            if (count_dup == sample2bam.size() && sample2bam.size() != 1) {
                vcb.filter(filterAllDup.getID());
            }
            if (count_del == sample2bam.size() && sample2bam.size() != 1) {
                vcb.filter(filterAllDel.getID());
            }
            if (n_samples_with_cnv == 0) {
                vcb.filter(filterNoSV.getID());
            }
            if (badestGQ != null) {
                vcb.log10PError(badestGQ / -10.0);
            }
            out.add(vcb.make());
        }
        progress.close();
        out.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iterIn);
        CloserUtil.close(out);
        sample2bam.values().forEach(F -> CloserUtil.close(F));
    }
}

Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) BiFunction(java.util.function.BiFunction) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) SAMRecord(htsjdk.samtools.SAMRecord) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) IntStream(java.util.stream.IntStream) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ReferenceSource(htsjdk.samtools.cram.ref.ReferenceSource) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) Closeable(java.io.Closeable) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) RunMedian(com.github.lindenb.jvarkit.math.RunMedian) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) ReferenceSource(htsjdk.samtools.cram.ref.ReferenceSource) Counter(com.github.lindenb.jvarkit.util.Counter) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) OptionalDouble(java.util.OptionalDouble) CigarElement(htsjdk.samtools.CigarElement) RunMedian(com.github.lindenb.jvarkit.math.RunMedian) SAMRecord(htsjdk.samtools.SAMRecord) Locatable(htsjdk.samtools.util.Locatable) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) OptionalDouble(java.util.OptionalDouble) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 14 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class DepthAnomaly method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter pw = null;
    try {
        final IndexCovUtils indexCovUtils = new IndexCovUtils(this.treshold);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(DepthAnomaly.this.refPath).validationStringency(ValidationStringency.LENIENT);
        final List<Path> inputBams = IOUtils.unrollPaths(this.bamsPath);
        if (inputBams.isEmpty()) {
            LOG.error("input bam file missing.");
            return -1;
        }
        Iterator<? extends Locatable> iter;
        final String input = oneFileOrNull(args);
        if (input == null) {
            final BedLineCodec codec = new BedLineCodec();
            final LineIterator liter = new LineIterator(stdin());
            iter = new AbstractIterator<Locatable>() {

                @Override
                protected Locatable advance() {
                    while (liter.hasNext()) {
                        final String line = liter.next();
                        final BedLine bed = codec.decode(line);
                        if (bed == null) {
                            continue;
                        }
                        return bed;
                    }
                    liter.close();
                    return null;
                }
            };
        } else {
            iter = IntervalListProvider.from(input).dictionary(dict).stream().iterator();
        }
        pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        while (iter.hasNext()) {
            final SimpleInterval locatable = new SimpleInterval(iter.next());
            boolean found_anomaly_here = false;
            if (this.min_anomaly_length * 3 >= locatable.getLengthOnReference()) {
                LOG.warning("interval " + locatable.toNiceString() + " is too short. skipping.");
                continue;
            }
            final int[] depth = new int[locatable.getLengthOnReference()];
            final int[] copy = new int[depth.length];
            for (final Path path : inputBams) {
                try (SamReader sr = samReaderFactory.open(path)) {
                    final SAMFileHeader header = sr.getFileHeader();
                    final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                    SequenceUtil.assertSequenceDictionariesEqual(dict, header.getSequenceDictionary());
                    Arrays.fill(depth, 0);
                    try (CloseableIterator<SAMRecord> siter = sr.queryOverlapping(locatable.getContig(), locatable.getStart(), locatable.getEnd())) {
                        while (siter.hasNext()) {
                            final SAMRecord rec = siter.next();
                            if (rec.getReadUnmappedFlag())
                                continue;
                            if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                                continue;
                            int ref = rec.getStart();
                            final Cigar cigar = rec.getCigar();
                            if (cigar == null)
                                continue;
                            for (CigarElement ce : cigar) {
                                final CigarOperator op = ce.getOperator();
                                final int len = ce.getLength();
                                if (op.consumesReferenceBases()) {
                                    if (op.consumesReadBases()) {
                                        for (int i = 0; i < len; i++) {
                                            final int pos = ref + i;
                                            if (pos < locatable.getStart())
                                                continue;
                                            if (pos > locatable.getEnd())
                                                break;
                                            depth[pos - locatable.getStart()]++;
                                        }
                                    }
                                    ref += len;
                                }
                            }
                        }
                    // loop cigar
                    }
                    // end samItere
                    System.arraycopy(depth, 0, copy, 0, depth.length);
                    final double median = median(copy);
                    final List<CovInterval> anomalies = new ArrayList<>();
                    // int minDepth = Arrays.stream(depth).min().orElse(0);
                    int x0 = 0;
                    while (x0 < depth.length && median > 0.0) {
                        final int xi = x0;
                        double total = 0;
                        double count = 0;
                        IndexCovUtils.SvType prevType = null;
                        while (x0 < depth.length) {
                            final IndexCovUtils.SvType type;
                            final int depthHere = depth[x0];
                            final double normDepth = depthHere / (median == 0 ? 1.0 : median);
                            if (depthHere > this.max_depth) {
                                type = null;
                            } else {
                                type = indexCovUtils.getType(normDepth);
                            }
                            x0++;
                            if (type == null || !type.isVariant())
                                break;
                            if (prevType != null && !type.equals(prevType))
                                break;
                            if (prevType == null)
                                prevType = type;
                            total += depthHere;
                            count++;
                        }
                        if (prevType != null && count >= this.min_anomaly_length) {
                            anomalies.add(new CovInterval(locatable.getContig(), locatable.getStart() + xi, locatable.getStart() + x0 - 1, prevType, Collections.singletonList(total / count)));
                        }
                    }
                    if (!anomalies.isEmpty() || force_screen) {
                        int i = 0;
                        while (i + 1 < anomalies.size() && this.merge_intervals) {
                            final CovInterval loc1 = anomalies.get(i);
                            final CovInterval loc2 = anomalies.get(i + 1);
                            if (loc1.svtype.equals(loc2.svtype) && loc1.withinDistanceOf(loc2, this.min_anomaly_length)) {
                                final List<Double> newdepths = new ArrayList<>(loc1.depths);
                                newdepths.addAll(loc2.depths);
                                anomalies.set(i, new CovInterval(loc1.getContig(), loc1.getStart(), loc2.getEnd(), loc1.svtype, newdepths));
                                anomalies.remove(i + 1);
                            } else {
                                i++;
                            }
                        }
                        if (!found_anomaly_here) {
                            pw.println(">>> " + locatable.toNiceString() + " length:" + StringUtils.niceInt(locatable.getLengthOnReference()));
                            found_anomaly_here = true;
                        }
                        if (screen_width > 0) {
                            pw.print("#");
                            pw.print(String.format("%-15s", sample));
                            pw.print("[");
                            for (i = 0; i < screen_width; i++) {
                                double t = 0;
                                double n = 0;
                                final int x1 = (int) (((i + 0) / (double) screen_width) * depth.length);
                                final int x2 = (int) (((i + 1) / (double) screen_width) * depth.length);
                                for (int x3 = x1; x3 <= x2 && x3 < depth.length; ++x3) {
                                    t += depth[x1];
                                    n++;
                                }
                                double normDepth = t /= n;
                                if (median > 0)
                                    normDepth /= median;
                                // centered
                                normDepth /= 2.0;
                                final boolean is_anomaly = anomalies.stream().anyMatch(R -> CoordMath.overlaps(R.getStart(), R.getEnd(), locatable.getStart() + x1, locatable.getStart() + x2));
                                final AnsiUtils.AnsiColor color = is_anomaly ? AnsiColor.RED : null;
                                if (color != null)
                                    pw.print(color.begin());
                                pw.print(AnsiUtils.getHistogram(normDepth));
                                if (color != null)
                                    pw.print(color.end());
                            }
                            pw.print("]");
                            pw.println();
                        }
                        for (i = 0; i < anomalies.size(); i++) {
                            final CovInterval anomalie = anomalies.get(i);
                            pw.print(anomalie.getContig());
                            pw.print("\t");
                            pw.print(anomalie.getStart() - 1);
                            pw.print("\t");
                            pw.print(anomalie.getEnd());
                            pw.print("\t");
                            pw.print(anomalie.getLengthOnReference());
                            pw.print("\t");
                            pw.print(anomalie.svtype.name());
                            pw.print("\t");
                            pw.print(sample);
                            pw.print("\t");
                            pw.print(path);
                            pw.print("\t");
                            pw.print(i + 1);
                            pw.print("\t");
                            pw.print(anomalies.size());
                            pw.print("\t");
                            pw.print(locatable.toNiceString());
                            pw.print("\t");
                            pw.print((int) median);
                            pw.print("\t");
                            pw.print((int) anomalie.depths.stream().mapToDouble(X -> X.doubleValue()).average().orElse(0));
                            pw.print("\t");
                            pw.println();
                        }
                    }
                }
            }
            if (found_anomaly_here) {
                pw.println("<<< " + locatable.toNiceString() + " length:" + StringUtils.niceInt(locatable.getLengthOnReference()));
                pw.println();
            }
        }
        // end while iter
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(pw);
    }
}

Also used : IndexCovUtils(com.github.lindenb.jvarkit.tools.structvar.indexcov.IndexCovUtils) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) AnsiColor(com.github.lindenb.jvarkit.ansi.AnsiUtils.AnsiColor) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) AnsiUtils(com.github.lindenb.jvarkit.ansi.AnsiUtils) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) CoordMath(htsjdk.samtools.util.CoordMath) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ArrayList(java.util.ArrayList) AnsiColor(com.github.lindenb.jvarkit.ansi.AnsiUtils.AnsiColor) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) IndexCovUtils(com.github.lindenb.jvarkit.tools.structvar.indexcov.IndexCovUtils) SamReader(htsjdk.samtools.SamReader) AnsiUtils(com.github.lindenb.jvarkit.ansi.AnsiUtils) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable)

Example 15 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class Transcript method getCdsInterval.

/**
 *return genomic interval spanning between codon start and codon stop if they both exists
 */
public default Optional<Locatable> getCdsInterval() {
    if (!hasCodonStartDefined() || !hasCodonStopDefined())
        return Optional.empty();
    int x1 = this.getEnd();
    int x2 = this.getStart();
    Locatable codon = getCodonStart().get();
    x1 = Math.min(codon.getStart(), x1);
    x2 = Math.max(codon.getEnd(), x2);
    codon = getCodonStop().get();
    x1 = Math.min(codon.getStart(), x1);
    x2 = Math.max(codon.getEnd(), x2);
    return Optional.of(new SimpleInterval(getContig(), x1, x2));
}

Also used : SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32