Search in sources :

Example 1 with Percentile

use of com.github.lindenb.jvarkit.math.stats.Percentile in project jvarkit by lindenb.

the class Bam2Wig method run.

private void run(final PrintWriter pw, final CloseableIterator<SAMRecord> iter, final SAMSequenceDictionary dict, // may be null
final Interval interval) {
    final Aggregator aggregator;
    switch(this.whatDisplay) {
        case COVERAGE:
            aggregator = new CoverageAggregator();
            break;
        case CLIPPING:
            aggregator = new ClipAggregator();
            break;
        case INSERTION:
            aggregator = new InsertionAggregator();
            break;
        case DELETION:
            aggregator = new DeletionAggregator();
            break;
        case READ_GROUPS:
            aggregator = new NumberOfSamplesCoveredX(this.min_depth, this.partition);
            break;
        case CASE_CTRL:
            if (this.pedigreeFile == null) {
                throw new JvarkitException.UserError("undefined pedigree");
            }
            aggregator = new CaseControlAggregator(this.pedigreeFile);
            break;
        default:
            throw new IllegalStateException(this.whatDisplay.name());
    }
    final Percentile percentile = Percentile.of(this.percentilType);
    SAMSequenceRecord ssr = null;
    int[] array = null;
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(dict);
    if (this.custom_track) {
        pw.println(UCSC_HEADER.replace("track_type", this.bedGraph ? "bedGraph" : "wiggle_0"));
    }
    for (; ; ) {
        final SAMRecord rec;
        if (iter.hasNext()) {
            rec = progess.watch(iter.next());
            if (rec.getReadUnmappedFlag())
                continue;
            if (this.samRecordFilter.filterOut(rec))
                continue;
            if (interval != null && !interval.overlaps(rec))
                continue;
        } else {
            rec = null;
        }
        if (rec == null || (ssr != null && ssr.getSequenceIndex() != rec.getReferenceIndex())) {
            if (ssr != null) {
                aggregator.finish(array);
                // dump data
                int start0 = (interval == null ? 0 : interval.getStart());
                boolean header_printed = false;
                while (start0 < array.length) {
                    if (interval != null) {
                        // 
                        if (!interval.getContig().equals(ssr.getSequenceName()))
                            break;
                        if (start0 > interval.getEnd())
                            break;
                        if (start0 + window_span < interval.getStart()) {
                            start0 += win_shift;
                            continue;
                        }
                    }
                    if (!this.bedGraph && !header_printed) {
                        pw.println("fixedStep chrom=" + ssr.getSequenceName() + " start=" + (start0 + 1) + " step=" + this.win_shift + " span=" + this.window_span);
                        header_printed = true;
                    }
                    /* 
						 * http://genome.ucsc.edu/goldenPath/help/wiggle.html
						   Wiggle track data values can be integer or real, positive or negative values.
						   Chromosome positions are specified as 1-relative.
						   For a chromosome of length N, the first position is 1 and the last position is N. Only positions specified have data. Positions not specified do not have data and will not be graphed. 
						 */
                    final double percentile_value = percentile.evaluate(array, start0, Math.min(this.window_span, array.length - start0));
                    if (this.bedGraph) {
                        pw.print(ssr.getSequenceName());
                        pw.print('\t');
                        pw.print(start0);
                        pw.print('\t');
                        pw.print(start0 + this.window_span);
                        pw.print('\t');
                    }
                    pw.printf(this.printfFormat, percentile_value);
                    pw.print('\n');
                    if (pw.checkError())
                        break;
                    start0 += this.win_shift;
                }
                array = null;
                System.gc();
                ssr = null;
            }
            if (rec == null)
                break;
            if (pw.checkError())
                break;
        }
        if (ssr == null) {
            System.gc();
            ssr = dict.getSequence(rec.getReferenceIndex());
            Objects.requireNonNull(ssr);
            LOG.info("Allocating int[" + ssr.getSequenceLength() + "]");
            array = new int[ssr.getSequenceLength()];
            LOG.info("Allocating : Done.");
            Arrays.fill(array, 0);
        }
        aggregator.visit(array, rec);
    }
    progess.finish();
    iter.close();
    pw.flush();
}
Also used : Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMRecord(htsjdk.samtools.SAMRecord)

Aggregations

Percentile (com.github.lindenb.jvarkit.math.stats.Percentile)1 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1