Search in sources :

Example 1 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class BamStats05 method doWork.

protected int doWork(final PrintWriter pw, final Map<String, List<SimpleInterval>> gene2interval, final String filename, final SamReader IN) throws Exception {
    try {
        LOG.info("Scanning " + filename);
        final SAMFileHeader header = IN.getFileHeader();
        final List<SAMReadGroupRecord> rgs = header.getReadGroups();
        if (rgs == null || rgs.isEmpty())
            throw new IOException("No read groups in " + filename);
        final Set<String> groupNames = this.groupBy.getPartitions(rgs);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
        final CoverageFactory coverageFactory = new CoverageFactory().setMappingQuality(this.mapq).setPartition(this.groupBy).setRecordFilter(R -> !filter.filterOut(R));
        for (final String partition : groupNames) {
            if (StringUtils.isBlank(partition))
                throw new IOException("Empty read group: " + groupBy.name() + " for " + filename);
            for (final String gene : gene2interval.keySet()) {
                final List<Integer> counts = new ArrayList<>();
                final List<SimpleInterval> intervals = gene2interval.get(gene);
                final String newContig = contigNameConverter.apply(intervals.get(0).getContig());
                if (StringUtil.isBlank(newContig)) {
                    throw new JvarkitException.ContigNotFoundInDictionary(intervals.get(0).getContig(), dict);
                }
                final CoverageFactory.SimpleCoverage coverage = coverageFactory.getSimpleCoverage(IN, intervals.stream().map(R -> R.renameContig(newContig)).collect(Collectors.toList()), partition);
                for (final SimpleInterval interval : intervals) {
                    for (int i = interval.getStart(); i <= interval.getEnd() && i <= coverage.getEnd(); i++) {
                        final int d = coverage.get(i - coverage.getStart());
                        counts.add(d);
                    }
                }
                Collections.sort(counts);
                pw.print(intervals.get(0).getContig() + "\t" + (coverage.getStart() - 1) + "\t" + coverage.getEnd() + "\t" + gene + "\t" + partition + "\t" + intervals.size() + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1));
                for (final int mc : this.min_coverages) {
                    final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
                    int count_no_coverage = 0;
                    for (int cov : counts) {
                        if (cov <= mc)
                            ++count_no_coverage;
                        discreteMedian.add(cov);
                    }
                    final OptionalDouble average = discreteMedian.getAverage();
                    final OptionalDouble median = discreteMedian.getMedian();
                    pw.print("\t" + (average.isPresent() ? String.format("%.2f", average.orElse(0.0)) : ".") + "\t" + (median.isPresent() ? String.format("%.2f", median.orElse(-1.0)) : ".") + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
                }
                pw.println();
            }
        // end gene
        }
        // end sample
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(IN);
    }
}
Also used : CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) OptionalDouble(java.util.OptionalDouble) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)

Example 2 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class SetFileTools method doStats.

/**
 * statistics for setFile
 */
private int doStats(final List<String> args) throws IOException {
    final Counter<String> chrom2count = new Counter<>();
    final DiscreteMedian<Integer> d_size = new DiscreteMedian<>();
    final DiscreteMedian<Integer> d_nitems = new DiscreteMedian<>();
    final DiscreteMedian<Integer> d_distance = new DiscreteMedian<>();
    final DiscreteMedian<Integer> d_item_size = new DiscreteMedian<>();
    for (final SAMSequenceRecord ssr : this.theDict.getSequences()) {
        chrom2count.initializeIfNotExists(noChr(ssr.getSequenceName()));
    }
    chrom2count.initializeIfNotExists("*multiple*");
    chrom2count.initializeIfNotExists("*empty*");
    try (CloseableIterator<SetFileRecord> iter = openSetFileIterator(args)) {
        while (iter.hasNext()) {
            final SetFileRecord rec = iter.next();
            final Set<String> chroms = rec.getChromosomes();
            switch(chroms.size()) {
                case 0:
                    chrom2count.incr("*empty*");
                    break;
                case 1:
                    chrom2count.incr(noChr(chroms.iterator().next()));
                    break;
                default:
                    chrom2count.incr("*multiple*");
                    break;
            }
            int len = rec.stream().mapToInt(B -> B.getLengthOnReference()).sum();
            d_size.add(len);
            d_nitems.add(rec.size());
            if (rec.size() > 0) {
                len = len / rec.size();
                d_item_size.add(len);
            }
            if (rec.size() > 1 && chroms.size() == 1) {
                int d = 0;
                final List<Locatable> L = sortAndMerge(rec);
                for (int i = 0; i + 1 < L.size(); i++) {
                    d += (rec.get(i + 1).getStart() - rec.get(i).getEnd());
                }
                d = d / (L.size() - 1);
                d_distance.add(d);
            }
        }
    }
    try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
        for (final String key : chrom2count.keySetDecreasing()) {
            pw.println("C\trecords-per-chrom\t" + key + "\t" + chrom2count.count(key));
        }
        pw.println("AS\taverage-size\t" + (d_size.isEmpty() ? "." : String.valueOf(d_size.getAverage().orElse(0.0))));
        pw.println("MS\tmedian-size\t" + (d_size.isEmpty() ? "." : String.valueOf(d_size.getMedian().orElse(0.0))));
        pw.println("AIS\taverage-item-size\t" + (d_item_size.isEmpty() ? "." : String.valueOf(d_item_size.getAverage().orElse(0.0))));
        pw.println("MIS\tmedian-item-size\t" + (d_item_size.isEmpty() ? "." : String.valueOf(d_item_size.getMedian().orElse(0.0))));
        pw.println("AN\taverage-nitems\t" + (d_nitems.isEmpty() ? "." : String.valueOf(d_nitems.getAverage().orElse(0.0))));
        pw.println("MN\tmedian-nitems\t" + (d_nitems.isEmpty() ? "." : String.valueOf(d_nitems.getMedian().orElse(0.0))));
        pw.println("AD\taverage-distance-between-items\t" + (d_distance.isEmpty() ? "." : String.valueOf(d_distance.getAverage().orElse(0.0))));
        pw.println("MD\tmedian-distance-between-items\t" + (d_distance.isEmpty() ? "." : String.valueOf(d_distance.getMedian().orElse(0.0))));
        pw.flush();
    }
    return 0;
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SetFileRecord(com.github.lindenb.jvarkit.setfile.SetFileRecord) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) AbstractCloseableIterator(com.github.lindenb.jvarkit.iterator.AbstractCloseableIterator) Collectors(java.util.stream.Collectors) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SetFileReaderFactory(com.github.lindenb.jvarkit.setfile.SetFileReaderFactory) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) LinkedList(java.util.LinkedList) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) Paths(java.nio.file.Paths) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Counter(com.github.lindenb.jvarkit.util.Counter) SetFileRecord(com.github.lindenb.jvarkit.setfile.SetFileRecord) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Locatable(htsjdk.samtools.util.Locatable) PrintWriter(java.io.PrintWriter)

Example 3 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class SamReadLengthDistribution method scan.

private void scan(final SamReader in, Path pathName) throws IOException {
    final String defName = (pathName == null ? "STDIN" : pathName.toString()) + "#" + this.partition.name();
    in.getFileHeader().getReadGroups().stream().map(RG -> this.partition.apply(RG)).map(S -> StringUtils.isBlank(S) ? defName : S).filter(S -> !this.sample2discreteMedian.containsKey(S)).forEach(S -> this.sample2discreteMedian.put(S, new DiscreteMedian<Integer>()));
    final CloseableIterator<SAMRecord> iter = openSamIterator(in);
    while (iter.hasNext()) {
        final SAMRecord rec = iter.next();
        if (rec.getReadFailsVendorQualityCheckFlag())
            continue;
        if (rec.getDuplicateReadFlag())
            continue;
        if (rec.isSecondaryOrSupplementary())
            continue;
        if (!rec.getReadUnmappedFlag() && rec.getMappingQuality() < this.mapq)
            continue;
        final String sampleName = this.partition.getPartion(rec, defName);
        DiscreteMedian<Integer> counter = this.sample2discreteMedian.get(sampleName);
        if (counter == null) {
            counter = new DiscreteMedian<>();
            this.sample2discreteMedian.put(sampleName, counter);
        }
        final int len;
        switch(this.method) {
            case SEQ_LENGTH:
                len = rec.getReadLength();
                break;
            case CIGAR_REF_LENGTH:
                {
                    if (rec.getReadUnmappedFlag())
                        continue;
                    final Cigar c = rec.getCigar();
                    if (c == null || c.isEmpty())
                        continue;
                    len = c.getReferenceLength();
                    break;
                }
            case CIGAR_PADDED_REF_LENGTH:
                {
                    if (rec.getReadUnmappedFlag())
                        continue;
                    final Cigar c = rec.getCigar();
                    if (c == null || c.isEmpty())
                        continue;
                    len = c.getPaddedReferenceLength();
                    break;
                }
            case INSERT_LENGTH:
                {
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getMateUnmappedFlag())
                        continue;
                    if (!rec.getContig().equals(rec.getMateReferenceName()))
                        continue;
                    // ignore 2nd
                    if (!rec.getFirstOfPairFlag())
                        continue;
                    len = Math.abs(rec.getInferredInsertSize());
                    break;
                }
            default:
                throw new IllegalStateException("unsupported method " + this.method);
        }
        counter.add(len);
    }
    iter.close();
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IntervalFilter(htsjdk.samtools.filter.IntervalFilter) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian)

Example 4 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class CoverageMatrix method doWork.

@Override
public int doWork(final List<String> args) {
    VariantContextWriter w = null;
    try {
        final IndexCovUtils indexCovUtils = new IndexCovUtils(this.indexCovTreshold);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(CoverageMatrix.this.refPath).validationStringency(ValidationStringency.LENIENT);
        final List<Path> inputBams = IOUtils.unrollPaths(args);
        if (inputBams.size() < 3) {
            LOG.error("not enough input bam file defined.");
            return -1;
        }
        final Set<String> sampleSet = new TreeSet<>();
        final List<String> idx2samples = new ArrayList<String>(inputBams.size());
        for (final Path path : inputBams) {
            try (SamReader sr = samReaderFactory.open(path)) {
                final SAMFileHeader header = sr.getFileHeader();
                final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                if (sampleSet.contains(sample)) {
                    LOG.error("duplicate sample " + sample);
                    return -1;
                }
                sampleSet.add(sample);
                idx2samples.add(sample);
            }
        }
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        w = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
        final VCFFormatHeaderLine fmtNormDepth = new VCFFormatHeaderLine("D", 1, VCFHeaderLineType.Float, "norm Depth");
        metaData.add(fmtNormDepth);
        final VCFFormatHeaderLine fmtStdDev = new VCFFormatHeaderLine("STDDEV", 1, VCFHeaderLineType.Float, "standard deviation");
        metaData.add(fmtStdDev);
        final VCFInfoHeaderLine infoStdDev = new VCFInfoHeaderLine(fmtStdDev.getID(), 1, VCFHeaderLineType.Float, "standard deviation");
        metaData.add(infoStdDev);
        final VCFInfoHeaderLine infoMedianD = new VCFInfoHeaderLine("MEDIAN", 1, VCFHeaderLineType.Float, "median depth");
        metaData.add(infoMedianD);
        final VCFInfoHeaderLine infoNSamples = new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "number of samples");
        metaData.add(infoNSamples);
        final VCFInfoHeaderLine infoSamples = new VCFInfoHeaderLine("SAMPLES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples");
        metaData.add(infoSamples);
        final VCFFilterHeaderLine filterAll = new VCFFilterHeaderLine("ALL_AFFECTED", "All Samples carry a variant");
        metaData.add(filterAll);
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.END_KEY);
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY);
        final VCFHeader vcfheader = new VCFHeader(metaData, idx2samples);
        vcfheader.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, vcfheader);
        w.writeHeader(vcfheader);
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            if (!StringUtils.isBlank(restrictContig) && !restrictContig.equals(ssr.getSequenceName()))
                continue;
            final int[] depth = new int[ssr.getSequenceLength()];
            final BitSet blackListedPositions = new BitSet(depth.length);
            // fill black listed regions
            if (this.blackListedPath != null) {
                try (TabixReader tbr = new TabixReader(this.blackListedPath.toString())) {
                    final ContigNameConverter cvt = ContigNameConverter.fromContigSet(tbr.getChromosomes());
                    final String ctg = cvt.apply(ssr.getSequenceName());
                    if (!StringUtils.isBlank(ctg)) {
                        final BedLineCodec codec = new BedLineCodec();
                        final TabixReader.Iterator tbxr = tbr.query(ctg, 1, ssr.getSequenceLength());
                        for (; ; ) {
                            final String line = tbxr.next();
                            if (line == null)
                                break;
                            final BedLine bed = codec.decode(line);
                            if (bed == null)
                                continue;
                            int p1 = Math.max(bed.getStart(), 1);
                            while (p1 <= ssr.getSequenceLength() && p1 <= bed.getEnd()) {
                                blackListedPositions.set(p1 - 1);
                                ++p1;
                            }
                        }
                    }
                } catch (Throwable err) {
                    LOG.warn(err);
                }
            }
            final SortingCollection<CovItem> sorter = SortingCollection.newInstance(CovItem.class, new CovItemCodec(), (A, B) -> A.compare0(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            for (int bam_idx = 0; bam_idx < inputBams.size(); ++bam_idx) {
                final Path path = inputBams.get(bam_idx);
                LOG.info(ssr.getContig() + ":" + path + " " + bam_idx + "/" + inputBams.size());
                try (SamReader sr = samReaderFactory.open(path)) {
                    final SAMFileHeader header = sr.getFileHeader();
                    SequenceUtil.assertSequenceDictionariesEqual(dict, header.getSequenceDictionary());
                    Arrays.fill(depth, 0);
                    try (CloseableIterator<SAMRecord> siter = sr.queryOverlapping(ssr.getContig(), 1, ssr.getLengthOnReference())) {
                        while (siter.hasNext()) {
                            final SAMRecord rec = siter.next();
                            if (rec.getReadUnmappedFlag())
                                continue;
                            if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                                continue;
                            int ref = rec.getStart();
                            final Cigar cigar = rec.getCigar();
                            if (cigar == null)
                                continue;
                            for (CigarElement ce : cigar) {
                                final CigarOperator op = ce.getOperator();
                                final int len = ce.getLength();
                                if (op.consumesReferenceBases()) {
                                    if (op.consumesReadBases()) {
                                        for (int i = 0; i < len; i++) {
                                            final int pos = ref + i;
                                            if (pos < 1)
                                                continue;
                                            if (pos > ssr.getLengthOnReference())
                                                break;
                                            depth[pos - 1]++;
                                        }
                                    }
                                    ref += len;
                                }
                            }
                        // loop cigar
                        }
                    // end samItere
                    }
                    // try
                    final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
                    int pos = 0;
                    while (pos < depth.length) {
                        if (!blackListedPositions.get(pos) && depth[pos] <= this.max_depth) {
                            discreteMedian.add(depth[pos]);
                        }
                        ++pos;
                    }
                    final double median = discreteMedian.getMedian().orElse(1.0);
                    LOG.info(idx2samples.get(bam_idx) + " :" + ssr.getSequenceName() + " median depth:" + median);
                    final DiscreteMedian<Integer> localMedian = new DiscreteMedian<>();
                    pos = 0;
                    while (pos < depth.length) {
                        if (blackListedPositions.get(pos)) /* non pas maxdepth */
                        {
                            ++pos;
                            continue;
                        }
                        int pos2 = pos;
                        localMedian.clear();
                        while (pos2 - pos < this.bin_size && pos2 < depth.length && !blackListedPositions.get(pos2)) {
                            // consider this.max_depth here ?
                            localMedian.add(depth[pos2]);
                            ++pos2;
                        }
                        if (pos2 - pos == this.bin_size) {
                            final double localMed = localMedian.getMedian().orElse(0.0);
                            final CovItem item = new CovItem();
                            item.pos = pos;
                            item.sample_idx = bam_idx;
                            item.depth = (float) (localMed / median);
                            item.stddev = (float) localMedian.getStandardDeviation().orElse(-1.0);
                            sorter.add(item);
                        }
                        pos = pos2;
                    }
                }
            // end loop over samples
            }
            // end loop over bams
            sorter.doneAdding();
            sorter.setDestructiveIteration(true);
            final CloseableIterator<CovItem> iter = sorter.iterator();
            final EqualRangeIterator<CovItem> iter2 = new EqualRangeIterator<>(iter, (A, B) -> Integer.compare(A.pos, B.pos));
            final Allele REF = Allele.create("N", true);
            final Allele DEL = Allele.create("<DEL>", false);
            final Allele DUP = Allele.create("<DUP>", false);
            while (iter2.hasNext()) {
                final List<CovItem> list = iter2.next();
                final CovItem first = list.get(0);
                final double avg_depth = list.stream().mapToDouble(F -> F.depth).average().orElse(0);
                final double sum = list.stream().mapToDouble(F -> F.depth).map(D -> Math.pow(D - avg_depth, 2.0)).sum();
                final double stdDev = Math.sqrt(sum / list.size());
                final OptionalDouble optMedianOfmedian = Percentile.median().evaluate(list.stream().mapToDouble(I -> I.depth));
                final double medianOfmedian = optMedianOfmedian.orElse(1.0);
                if (medianOfmedian <= 0)
                    continue;
                for (int i = 0; i < list.size(); i++) {
                    list.get(i).depth /= medianOfmedian;
                }
                if (list.stream().allMatch(F -> Float.isNaN(F.depth) || Float.isInfinite(F.depth)))
                    continue;
                final VariantContextBuilder vcb = new VariantContextBuilder();
                vcb.chr(ssr.getContig());
                vcb.start(first.pos + 1);
                vcb.stop(first.pos + this.bin_size);
                vcb.attribute(VCFConstants.END_KEY, first.pos + this.bin_size);
                vcb.attribute(infoStdDev.getID(), stdDev);
                vcb.attribute(infoMedianD.getID(), medianOfmedian);
                final Set<Allele> alleles = new HashSet<>();
                alleles.add(REF);
                final List<Genotype> genotypes = new ArrayList<>(list.size());
                final Set<String> affected = new TreeSet<>();
                for (int i = 0; i < list.size(); i++) {
                    final CovItem item = list.get(i);
                    final String sn = idx2samples.get(item.sample_idx);
                    final GenotypeBuilder gb;
                    switch(indexCovUtils.getType(item.depth)) {
                        case AMBIGOUS:
                            gb = new GenotypeBuilder(sn, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                            break;
                        case HET_DEL:
                            alleles.add(DEL);
                            gb = new GenotypeBuilder(sn, Arrays.asList(REF, DEL));
                            affected.add(sn);
                            break;
                        case HOM_DEL:
                            alleles.add(DEL);
                            gb = new GenotypeBuilder(sn, Arrays.asList(DEL, DEL));
                            affected.add(sn);
                            break;
                        case HET_DUP:
                            alleles.add(DUP);
                            gb = new GenotypeBuilder(sn, Arrays.asList(REF, DUP));
                            affected.add(sn);
                            break;
                        case HOM_DUP:
                            alleles.add(DUP);
                            gb = new GenotypeBuilder(sn, Arrays.asList(DUP, DUP));
                            affected.add(sn);
                            break;
                        case REF:
                            gb = new GenotypeBuilder(sn, Arrays.asList(REF, REF));
                            break;
                        default:
                            throw new IllegalStateException();
                    }
                    gb.attribute(fmtNormDepth.getID(), item.depth);
                    gb.attribute(fmtStdDev.getID(), item.stddev);
                    genotypes.add(gb.make());
                }
                if (affected.isEmpty())
                    continue;
                if (affected.size() == inputBams.size()) {
                    vcb.filter(filterAll.getID());
                } else {
                    vcb.passFilters();
                }
                vcb.attribute(infoSamples.getID(), new ArrayList<>(affected));
                vcb.attribute(infoNSamples.getID(), affected.size());
                vcb.genotypes(genotypes);
                vcb.alleles(alleles);
                w.add(vcb.make());
            }
            iter2.close();
            iter.close();
            sorter.cleanup();
            System.gc();
        }
        // end while iter
        w.close();
        w = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
    }
}
Also used : IndexCovUtils(com.github.lindenb.jvarkit.tools.structvar.indexcov.IndexCovUtils) WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixReader(htsjdk.tribble.readers.TabixReader) VCFConstants(htsjdk.variant.vcf.VCFConstants) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BitSet(java.util.BitSet) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) IndexCovUtils(com.github.lindenb.jvarkit.tools.structvar.indexcov.IndexCovUtils) TreeSet(java.util.TreeSet) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) BitSet(java.util.BitSet) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TabixReader(htsjdk.tribble.readers.TabixReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) OptionalDouble(java.util.OptionalDouble) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 5 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class DepthOfCoverage method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    if (this.auto_mask && this.faidx == null) {
        LOG.error("Cannot auto mask if REF is not defined");
        return -1;
    }
    if (this.maskBed != null && this.includeBed != null) {
        LOG.error("both --mask and --bed both defined");
        return -1;
    }
    ReferenceSequenceFile referenceSequenceFile = null;
    try {
        final Predicate<String> isRejectContigRegex;
        if (!StringUtils.isBlank(this.skipContigExpr)) {
            final Pattern pat = Pattern.compile(this.skipContigExpr);
            isRejectContigRegex = S -> pat.matcher(S).matches();
        } else {
            isRejectContigRegex = S -> false;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            srf.setUseAsyncIo(this.asyncIo);
            referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        }
        out = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        out.print("#BAM\tSample\tContig\tContig-Length\tMasked-Contig-Length\tCount\tDepth\tMedian\tMin\tMax\tMaxPos");
        for (RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
            if (r.getMinInclusive() == null)
                continue;
            out.print("\t");
            out.print(r.toString());
        }
        out.println();
        for (final Path path : IOUtils.unrollPaths(args)) {
            try (final SamReader sr = srf.open(path)) {
                if (!sr.hasIndex()) {
                    LOG.error("File " + path + " is not indexed.");
                    return -1;
                }
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final Set<String> rejectContigSet = dict.getSequences().stream().map(SSR -> SSR.getSequenceName()).filter(isRejectContigRegex).collect(Collectors.toCollection(HashSet::new));
                rejectContigSet.addAll(dict.getSequences().stream().filter(SSR -> SSR.getSequenceLength() < this.skipContigLength).map(SSR -> SSR.getSequenceName()).collect(Collectors.toCollection(HashSet::new)));
                if (!header.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
                    LOG.error("file is not sorted on coordinate :" + header.getSortOrder() + " " + path);
                    return -1;
                }
                final QueryInterval[] intervals;
                if (this.useBamIndexFlag && this.includeBed != null) {
                    if (!sr.hasIndex()) {
                        LOG.error("Bam is not indexed. " + path);
                        return -1;
                    }
                    final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                    final List<QueryInterval> L = new ArrayList<>();
                    try (BedLineReader br = new BedLineReader(this.includeBed)) {
                        while (br.hasNext()) {
                            final BedLine bed = br.next();
                            final String ctg = contigNameConverter.apply(bed.getContig());
                            if (StringUtils.isBlank(ctg))
                                continue;
                            final int tid = dict.getSequenceIndex(ctg);
                            if (tid < 0)
                                continue;
                            L.add(new QueryInterval(tid, bed.getStart(), bed.getEnd()));
                        }
                    }
                    intervals = QueryInterval.optimizeIntervals(L.toArray(new QueryInterval[L.size()]));
                } else {
                    intervals = null;
                }
                Integer minCov = null;
                Integer maxCov = null;
                ContigPos maxCovPosition = null;
                long count_raw_bases = 0L;
                long count_bases = 0L;
                long sum_coverage = 0L;
                final DiscreteMedian<Integer> discreteMedian_wg = new DiscreteMedian<>();
                final Counter<RangeOfIntegers.Range> countMap_wg = new Counter<>();
                final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(path.toString());
                int[] coverage = null;
                String prevContig = null;
                BitSet mask = null;
                final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
                try (CloseableIterator<SAMRecord> iter = intervals == null ? sr.iterator() : sr.queryOverlapping(intervals)) {
                    for (; ; ) {
                        final SAMRecord rec = iter.hasNext() ? progress.apply(iter.next()) : null;
                        if (rec != null) {
                            if (!SAMRecordDefaultFilter.accept(rec, this.mapping_quality))
                                continue;
                            if (rejectContigSet.contains(rec.getContig()))
                                continue;
                        }
                        if (rec == null || !rec.getContig().equals(prevContig)) {
                            if (coverage != null) {
                                // DUMP
                                long count_bases_ctg = 0L;
                                long sum_coverage_ctg = 0L;
                                Integer minV_ctg = null;
                                Integer maxV_ctg = null;
                                ContigPos maxPos_ctg = null;
                                final DiscreteMedian<Integer> discreteMedian_ctg = new DiscreteMedian<>();
                                final Counter<RangeOfIntegers.Range> countMap_ctg = new Counter<>();
                                for (int i = 0; i < coverage.length; i++) {
                                    if (mask.get(i))
                                        continue;
                                    final int covi = coverage[i];
                                    if (covi > this.max_depth)
                                        continue;
                                    if (minV_ctg == null || minV_ctg.intValue() > covi)
                                        minV_ctg = covi;
                                    if (maxV_ctg == null || maxV_ctg.intValue() < covi) {
                                        maxV_ctg = covi;
                                        maxPos_ctg = new ContigPos(prevContig, i + 1);
                                    }
                                    countMap_ctg.incr(this.summaryCov.getRange(covi));
                                    count_bases_ctg++;
                                    sum_coverage_ctg += covi;
                                    discreteMedian_ctg.add(covi);
                                }
                                out.print(path);
                                out.print("\t");
                                out.print(sample);
                                out.print("\t");
                                out.print(prevContig);
                                out.print("\t");
                                out.print(coverage.length);
                                out.print("\t");
                                out.print(count_bases_ctg);
                                out.print("\t");
                                out.print(sum_coverage_ctg);
                                out.print("\t");
                                if (count_bases_ctg > 0) {
                                    out.printf("%.2f", sum_coverage_ctg / (double) count_bases_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                final OptionalDouble median = discreteMedian_ctg.getMedian();
                                if (median.isPresent()) {
                                    out.print(median.getAsDouble());
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (minV_ctg != null) {
                                    out.print(minV_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (maxV_ctg != null) {
                                    out.print(maxV_ctg);
                                    out.print("\t");
                                    out.print(maxPos_ctg);
                                } else {
                                    out.print("N/A\tN/A");
                                }
                                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                                    if (r.getMinInclusive() == null)
                                        continue;
                                    out.print("\t");
                                    out.print(countMap_ctg.count(r));
                                    if (!countMap_ctg.isEmpty()) {
                                        out.print(" ");
                                        out.printf("(%.2f%%)", (countMap_ctg.count(r) / (countMap_ctg.getTotal() * 1.0)) * 100.0);
                                    }
                                }
                                out.println();
                                if (minCov == null || (minV_ctg != null && minV_ctg.compareTo(minCov) < 0))
                                    minCov = minV_ctg;
                                if (maxCov == null || (maxV_ctg != null && maxV_ctg.compareTo(maxCov) > 0)) {
                                    maxCov = maxV_ctg;
                                    maxCovPosition = maxPos_ctg;
                                }
                                count_bases += count_bases_ctg;
                                sum_coverage += sum_coverage_ctg;
                                count_raw_bases += coverage.length;
                                discreteMedian_wg.add(discreteMedian_ctg);
                                countMap_wg.putAll(countMap_ctg);
                            }
                            coverage = null;
                            mask = null;
                            // /
                            System.gc();
                            if (rec == null)
                                break;
                            final SAMSequenceRecord ssr = Objects.requireNonNull(dict.getSequence(rec.getContig()));
                            coverage = new int[ssr.getSequenceLength()];
                            mask = new BitSet(ssr.getSequenceLength());
                            if (this.auto_mask && referenceSequenceFile != null) {
                                final byte[] refSeq = Objects.requireNonNull(referenceSequenceFile.getSequence(ssr.getSequenceName())).getBases();
                                for (int i = 0; i < refSeq.length; i++) {
                                    if (AcidNucleics.isATGC(refSeq[i]))
                                        continue;
                                    mask.set(i);
                                }
                            }
                            /* read mask */
                            if (this.maskBed != null) {
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.maskBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        for (int p1 = bed.getStart(); p1 <= bed.getEnd() && p1 <= coverage.length; ++p1) {
                                            mask.set(p1 - 1);
                                        }
                                    }
                                }
                            } else if (this.includeBed != null) {
                                final List<Locatable> list = new ArrayList<>();
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.includeBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        final String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        list.add(new SimpleInterval(ctg, bed.getStart(), bed.getEnd()));
                                    }
                                }
                                // sort on starts
                                Collections.sort(list, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
                                int p1 = 1;
                                while (p1 <= coverage.length) {
                                    while (!list.isEmpty() && list.get(0).getEnd() < p1) {
                                        list.remove(0);
                                    }
                                    if (!list.isEmpty() && list.get(0).getStart() <= p1 && p1 <= list.get(0).getEnd()) {
                                        ++p1;
                                        continue;
                                    }
                                    mask.set(p1 - 1);
                                    p1++;
                                }
                            }
                            prevContig = rec.getContig();
                        }
                        int max_end1 = coverage.length;
                        if (!this.disable_paired_overlap_flag && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && rec.getAlignmentStart() < rec.getMateAlignmentStart() && rec.getAlignmentEnd() > rec.getMateAlignmentStart()) {
                            max_end1 = rec.getMateAlignmentStart() - 1;
                        }
                        for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
                            final int pos1 = block.getReferenceStart();
                            final int len = block.getLength();
                            for (int i = 0; i < len; i++) {
                                if (pos1 + i - 1 >= 0 && pos1 + i <= max_end1) {
                                    coverage[pos1 + i - 1]++;
                                }
                            }
                        }
                    }
                /* end rec */
                }
                /* end iter */
                progress.close();
                out.print(path);
                out.print("\t");
                out.print(sample);
                out.print("\t");
                out.print(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
                out.print("\t");
                out.print(count_raw_bases);
                out.print("\t");
                out.print(count_bases);
                out.print("\t");
                out.print(sum_coverage);
                out.print("\t");
                if (count_bases > 0) {
                    out.printf("%.2f", sum_coverage / (double) count_bases);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                final OptionalDouble median = discreteMedian_wg.getMedian();
                if (median.isPresent()) {
                    out.print(median.getAsDouble());
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (minCov != null) {
                    out.print(minCov);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (maxCov != null) {
                    out.print(maxCov + "\t" + maxCovPosition);
                } else {
                    out.print("N/A\tN/A");
                }
                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                    if (r.getMinInclusive() == null)
                        continue;
                    out.print("\t");
                    out.print(countMap_wg.count(r));
                    if (!countMap_wg.isEmpty()) {
                        out.print(" ");
                        out.printf("(%.2f%%)", (countMap_wg.count(r) / (countMap_wg.getTotal() * 1.0)) * 100.0);
                    }
                }
                out.println();
            }
        }
        out.flush();
        out.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(referenceSequenceFile);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) AlignmentBlock(htsjdk.samtools.AlignmentBlock) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) SAMRecord(htsjdk.samtools.SAMRecord) Objects(java.util.Objects) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) QueryInterval(htsjdk.samtools.QueryInterval) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) BitSet(java.util.BitSet) Pattern(java.util.regex.Pattern) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) Path(java.nio.file.Path) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) BitSet(java.util.BitSet) OptionalDouble(java.util.OptionalDouble) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) SAMFileHeader(htsjdk.samtools.SAMFileHeader) AlignmentBlock(htsjdk.samtools.AlignmentBlock)

Aggregations

DiscreteMedian (com.github.lindenb.jvarkit.math.DiscreteMedian)7 Parameter (com.beust.jcommander.Parameter)6 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)6 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)6 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)6 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)6 Program (com.github.lindenb.jvarkit.util.jcommander.Program)6 Logger (com.github.lindenb.jvarkit.util.log.Logger)6 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)6 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)5 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)5 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 SAMRecord (htsjdk.samtools.SAMRecord)5 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 SamReader (htsjdk.samtools.SamReader)5 SamReaderFactory (htsjdk.samtools.SamReaderFactory)5 CloseableIterator (htsjdk.samtools.util.CloseableIterator)5 Path (java.nio.file.Path)5