Search in sources :

Example 16 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class BamCmpCoverage method readBedFile.

private void readBedFile(final Path bedFile) {
    if (this.intervals == null) {
        intervals = new IntervalTreeMap<Boolean>();
    }
    try (BedLineReader r = new BedLineReader(bedFile)) {
        while (r.hasNext()) {
            final BedLine bedLine = r.next();
            if (bedLine == null)
                continue;
            this.intervals.put(bedLine.toInterval(), true);
        }
    }
}
Also used : BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine)

Example 17 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class ReferenceToVCF method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.bedFile != null) {
        if (limitBed == null)
            limitBed = new IntervalTreeMap<Boolean>();
        try {
            try (final BedLineReader r = new BedLineReader(this.bedFile)) {
                while (r.hasNext()) {
                    final BedLine record = r.next();
                    limitBed.put(record.toInterval(), true);
                }
            }
        } catch (final Exception err) {
            LOG.error(err);
            return -1;
        }
    }
    final Random random = new Random(0L);
    VariantContextWriter out = null;
    try {
        final ReferenceSequenceFile fasta = ReferenceSequenceFileFactory.getReferenceSequenceFile(Paths.get(oneAndOnlyOneFile(args)));
        SAMSequenceDictionary dict = fasta.getSequenceDictionary();
        out = this.writingVariants.dictionary(dict).open(this.outputFile);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VCFHeader header = new VCFHeader();
        JVarkitVersion.getInstance().addMetaData(this, header);
        header.setSequenceDictionary(dict);
        out.writeHeader(header);
        final List<List<Allele>> combination = new ArrayList<List<Allele>>(4);
        // always keep REF as first allele please
        combination.add(Arrays.asList(Allele.create("A", true), Allele.create("C", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("C", true), Allele.create("A", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("G", true), Allele.create("A", false), Allele.create("C", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("T", true), Allele.create("A", false), Allele.create("C", false), Allele.create("G", false)));
        for (SAMSequenceRecord ssr : dict.getSequences()) {
            GenomicSequence genome = new GenomicSequence(fasta, ssr.getSequenceName());
            if (this.limitBed != null) {
                Interval interval = new Interval(ssr.getSequenceName(), 1, genome.length());
                if (!this.limitBed.containsOverlapping(interval))
                    continue;
            }
            for (int n = 0; n < genome.length(); ++n) {
                progress.watch(ssr.getSequenceIndex(), n);
                List<Allele> alleles = null;
                byte ref = (byte) genome.charAt(n);
                switch(ref) {
                    case 'a':
                    case 'A':
                        alleles = combination.get(0);
                        break;
                    case 'c':
                    case 'C':
                        alleles = combination.get(1);
                        break;
                    case 'g':
                    case 'G':
                        alleles = combination.get(2);
                        break;
                    case 't':
                    case 'T':
                        alleles = combination.get(3);
                        break;
                    default:
                        break;
                }
                if (alleles == null)
                    continue;
                if (this.limitBed != null) {
                    Interval interval = new Interval(ssr.getSequenceName(), n + 1, n + 1);
                    if (!this.limitBed.containsOverlapping(interval))
                        continue;
                }
                if (!disjoint_alts) {
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.stop(n + 1);
                    vcb.alleles(alleles);
                    out.add(vcb.make());
                } else {
                    for (// index 0 is always REF
                    int a = 1; // index 0 is always REF
                    a < 4; // index 0 is always REF
                    ++a) {
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(ssr.getSequenceName());
                        vcb.start(n + 1);
                        vcb.stop(n + 1);
                        // index 0 is always REF
                        vcb.alleles(Arrays.asList(alleles.get(0), alleles.get(a)));
                        out.add(vcb.make());
                    }
                }
                if (insertion_size > 0 && n + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REFERENCE
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(n + 1), true));
                    StringBuilder sb = new StringBuilder(insertion_size + 2);
                    sb.append(genome.charAt(n));
                    for (int n2 = 0; n2 < insertion_size; ++n2) {
                        switch(random.nextInt(4)) {
                            case 0:
                                sb.append('A');
                                break;
                            case 1:
                                sb.append('C');
                                break;
                            case 2:
                                sb.append('G');
                                break;
                            case 3:
                                sb.append('T');
                                break;
                        }
                    }
                    sb.append(genome.charAt(n + 1));
                    alleles.add(Allele.create(sb.toString(), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (deletion_size > 0 && n + deletion_size + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REF
                    final StringBuilder sb = new StringBuilder(deletion_size + 2);
                    sb.append(genome.charAt(n));
                    int lastpos = n + 1;
                    for (int n2 = 0; n2 < deletion_size; ++n2, lastpos++) {
                        sb.append(genome.charAt(lastpos));
                    }
                    sb.append(genome.charAt(lastpos));
                    alleles.add(Allele.create(sb.toString(), true));
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(lastpos), false));
                    final VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (out.checkError())
                    break;
            }
            if (out.checkError())
                break;
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Allele(htsjdk.variant.variantcontext.Allele) Random(java.util.Random) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval)

Example 18 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method doWork.

@Override
public int doWork(final List<String> args) {
    final Set<SimplePosition> mutations = new TreeSet<>();
    if (this.extend < 1) {
        LOG.error("-x should be >=1");
        return -1;
    }
    try {
        this.samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
        if (this.referenceFileFile != null) {
            this.indexedFastaSequenceFile = htsjdk.samtools.reference.ReferenceSequenceFileFactory.getReferenceSequenceFile(this.referenceFileFile);
            this.samReaderFactory.referenceSequence(this.referenceFileFile);
            this.fastaDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
        }
        this.positionStrs.stream().flatMap(S -> Arrays.stream(S.split("[ \t;]+"))).filter(S -> !StringUtils.isBlank(S)).map(S -> new SimplePosition(S)).forEach(X -> mutations.add(X));
        if (this.positionFile != null) {
            if (this.positionFile.toString().endsWith(".bed") || this.positionFile.toString().endsWith(".bed.gz")) {
                try (BedLineReader br = new BedLineReader(this.positionFile)) {
                    br.stream().filter(B -> B != null).forEach(bedLine -> {
                        for (int x = bedLine.getStart(); x <= bedLine.getEnd(); ++x) {
                            mutations.add(new SimplePosition(bedLine.getContig(), x));
                        }
                    });
                }
            } else {
                try (BufferedReader r2 = IOUtils.openPathForBufferedReading(this.positionFile)) {
                    r2.lines().filter(L -> !StringUtils.isBlank(L)).filter(L -> !L.startsWith("#")).forEach(L -> mutations.add(new SimplePosition(L)));
                }
            }
        }
        if (mutations.isEmpty()) {
            LOG.fatal("undefined position \'str\'");
            return -1;
        }
        LOG.info("number of mutations " + mutations.size());
        this.out = this.openPathOrStdoutAsPrintWriter(this.outputFile);
        out.print("#File");
        out.print('\t');
        out.print("CHROM");
        out.print('\t');
        out.print("POS");
        if (this.indexedFastaSequenceFile != null) {
            out.print('\t');
            out.print("REF");
        }
        out.print('\t');
        out.print(this.groupBy.name().toUpperCase());
        out.print('\t');
        out.print("DEPTH");
        for (final CigarOperator op : CigarOperator.values()) {
            out.print('\t');
            out.print(op.name());
        }
        for (char c : BASES_To_PRINT) {
            out.print('\t');
            out.print("Base(" + c + ")");
        }
        out.println();
        if (args.isEmpty()) {
            try (BufferedReader r = new BufferedReader(new InputStreamReader(stdin()))) {
                scan(r, mutations);
            }
        } else {
            for (final String filename : args) {
                try (BufferedReader r = IOUtils.openURIForBufferedReading(filename)) {
                    scan(r, mutations);
                }
            }
        }
        this.out.flush();
        return 0;
    } catch (final Throwable err) {
        LOG.severe(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.out);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) SAMRecord(htsjdk.samtools.SAMRecord) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) TreeMap(java.util.TreeMap) Paths(java.nio.file.Paths) FileExtensions(htsjdk.samtools.util.FileExtensions) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) InputStreamReader(java.io.InputStreamReader) TreeSet(java.util.TreeSet) BufferedReader(java.io.BufferedReader) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) CigarOperator(htsjdk.samtools.CigarOperator)

Example 19 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class DepthOfCoverage method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    if (this.auto_mask && this.faidx == null) {
        LOG.error("Cannot auto mask if REF is not defined");
        return -1;
    }
    if (this.maskBed != null && this.includeBed != null) {
        LOG.error("both --mask and --bed both defined");
        return -1;
    }
    ReferenceSequenceFile referenceSequenceFile = null;
    try {
        final Predicate<String> isRejectContigRegex;
        if (!StringUtils.isBlank(this.skipContigExpr)) {
            final Pattern pat = Pattern.compile(this.skipContigExpr);
            isRejectContigRegex = S -> pat.matcher(S).matches();
        } else {
            isRejectContigRegex = S -> false;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            srf.setUseAsyncIo(this.asyncIo);
            referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        }
        out = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        out.print("#BAM\tSample\tContig\tContig-Length\tMasked-Contig-Length\tCount\tDepth\tMedian\tMin\tMax\tMaxPos");
        for (RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
            if (r.getMinInclusive() == null)
                continue;
            out.print("\t");
            out.print(r.toString());
        }
        out.println();
        for (final Path path : IOUtils.unrollPaths(args)) {
            try (final SamReader sr = srf.open(path)) {
                if (!sr.hasIndex()) {
                    LOG.error("File " + path + " is not indexed.");
                    return -1;
                }
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final Set<String> rejectContigSet = dict.getSequences().stream().map(SSR -> SSR.getSequenceName()).filter(isRejectContigRegex).collect(Collectors.toCollection(HashSet::new));
                rejectContigSet.addAll(dict.getSequences().stream().filter(SSR -> SSR.getSequenceLength() < this.skipContigLength).map(SSR -> SSR.getSequenceName()).collect(Collectors.toCollection(HashSet::new)));
                if (!header.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
                    LOG.error("file is not sorted on coordinate :" + header.getSortOrder() + " " + path);
                    return -1;
                }
                final QueryInterval[] intervals;
                if (this.useBamIndexFlag && this.includeBed != null) {
                    if (!sr.hasIndex()) {
                        LOG.error("Bam is not indexed. " + path);
                        return -1;
                    }
                    final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                    final List<QueryInterval> L = new ArrayList<>();
                    try (BedLineReader br = new BedLineReader(this.includeBed)) {
                        while (br.hasNext()) {
                            final BedLine bed = br.next();
                            final String ctg = contigNameConverter.apply(bed.getContig());
                            if (StringUtils.isBlank(ctg))
                                continue;
                            final int tid = dict.getSequenceIndex(ctg);
                            if (tid < 0)
                                continue;
                            L.add(new QueryInterval(tid, bed.getStart(), bed.getEnd()));
                        }
                    }
                    intervals = QueryInterval.optimizeIntervals(L.toArray(new QueryInterval[L.size()]));
                } else {
                    intervals = null;
                }
                Integer minCov = null;
                Integer maxCov = null;
                ContigPos maxCovPosition = null;
                long count_raw_bases = 0L;
                long count_bases = 0L;
                long sum_coverage = 0L;
                final DiscreteMedian<Integer> discreteMedian_wg = new DiscreteMedian<>();
                final Counter<RangeOfIntegers.Range> countMap_wg = new Counter<>();
                final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(path.toString());
                int[] coverage = null;
                String prevContig = null;
                BitSet mask = null;
                final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
                try (CloseableIterator<SAMRecord> iter = intervals == null ? sr.iterator() : sr.queryOverlapping(intervals)) {
                    for (; ; ) {
                        final SAMRecord rec = iter.hasNext() ? progress.apply(iter.next()) : null;
                        if (rec != null) {
                            if (!SAMRecordDefaultFilter.accept(rec, this.mapping_quality))
                                continue;
                            if (rejectContigSet.contains(rec.getContig()))
                                continue;
                        }
                        if (rec == null || !rec.getContig().equals(prevContig)) {
                            if (coverage != null) {
                                // DUMP
                                long count_bases_ctg = 0L;
                                long sum_coverage_ctg = 0L;
                                Integer minV_ctg = null;
                                Integer maxV_ctg = null;
                                ContigPos maxPos_ctg = null;
                                final DiscreteMedian<Integer> discreteMedian_ctg = new DiscreteMedian<>();
                                final Counter<RangeOfIntegers.Range> countMap_ctg = new Counter<>();
                                for (int i = 0; i < coverage.length; i++) {
                                    if (mask.get(i))
                                        continue;
                                    final int covi = coverage[i];
                                    if (covi > this.max_depth)
                                        continue;
                                    if (minV_ctg == null || minV_ctg.intValue() > covi)
                                        minV_ctg = covi;
                                    if (maxV_ctg == null || maxV_ctg.intValue() < covi) {
                                        maxV_ctg = covi;
                                        maxPos_ctg = new ContigPos(prevContig, i + 1);
                                    }
                                    countMap_ctg.incr(this.summaryCov.getRange(covi));
                                    count_bases_ctg++;
                                    sum_coverage_ctg += covi;
                                    discreteMedian_ctg.add(covi);
                                }
                                out.print(path);
                                out.print("\t");
                                out.print(sample);
                                out.print("\t");
                                out.print(prevContig);
                                out.print("\t");
                                out.print(coverage.length);
                                out.print("\t");
                                out.print(count_bases_ctg);
                                out.print("\t");
                                out.print(sum_coverage_ctg);
                                out.print("\t");
                                if (count_bases_ctg > 0) {
                                    out.printf("%.2f", sum_coverage_ctg / (double) count_bases_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                final OptionalDouble median = discreteMedian_ctg.getMedian();
                                if (median.isPresent()) {
                                    out.print(median.getAsDouble());
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (minV_ctg != null) {
                                    out.print(minV_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (maxV_ctg != null) {
                                    out.print(maxV_ctg);
                                    out.print("\t");
                                    out.print(maxPos_ctg);
                                } else {
                                    out.print("N/A\tN/A");
                                }
                                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                                    if (r.getMinInclusive() == null)
                                        continue;
                                    out.print("\t");
                                    out.print(countMap_ctg.count(r));
                                    if (!countMap_ctg.isEmpty()) {
                                        out.print(" ");
                                        out.printf("(%.2f%%)", (countMap_ctg.count(r) / (countMap_ctg.getTotal() * 1.0)) * 100.0);
                                    }
                                }
                                out.println();
                                if (minCov == null || (minV_ctg != null && minV_ctg.compareTo(minCov) < 0))
                                    minCov = minV_ctg;
                                if (maxCov == null || (maxV_ctg != null && maxV_ctg.compareTo(maxCov) > 0)) {
                                    maxCov = maxV_ctg;
                                    maxCovPosition = maxPos_ctg;
                                }
                                count_bases += count_bases_ctg;
                                sum_coverage += sum_coverage_ctg;
                                count_raw_bases += coverage.length;
                                discreteMedian_wg.add(discreteMedian_ctg);
                                countMap_wg.putAll(countMap_ctg);
                            }
                            coverage = null;
                            mask = null;
                            // /
                            System.gc();
                            if (rec == null)
                                break;
                            final SAMSequenceRecord ssr = Objects.requireNonNull(dict.getSequence(rec.getContig()));
                            coverage = new int[ssr.getSequenceLength()];
                            mask = new BitSet(ssr.getSequenceLength());
                            if (this.auto_mask && referenceSequenceFile != null) {
                                final byte[] refSeq = Objects.requireNonNull(referenceSequenceFile.getSequence(ssr.getSequenceName())).getBases();
                                for (int i = 0; i < refSeq.length; i++) {
                                    if (AcidNucleics.isATGC(refSeq[i]))
                                        continue;
                                    mask.set(i);
                                }
                            }
                            /* read mask */
                            if (this.maskBed != null) {
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.maskBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        for (int p1 = bed.getStart(); p1 <= bed.getEnd() && p1 <= coverage.length; ++p1) {
                                            mask.set(p1 - 1);
                                        }
                                    }
                                }
                            } else if (this.includeBed != null) {
                                final List<Locatable> list = new ArrayList<>();
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.includeBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        final String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        list.add(new SimpleInterval(ctg, bed.getStart(), bed.getEnd()));
                                    }
                                }
                                // sort on starts
                                Collections.sort(list, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
                                int p1 = 1;
                                while (p1 <= coverage.length) {
                                    while (!list.isEmpty() && list.get(0).getEnd() < p1) {
                                        list.remove(0);
                                    }
                                    if (!list.isEmpty() && list.get(0).getStart() <= p1 && p1 <= list.get(0).getEnd()) {
                                        ++p1;
                                        continue;
                                    }
                                    mask.set(p1 - 1);
                                    p1++;
                                }
                            }
                            prevContig = rec.getContig();
                        }
                        int max_end1 = coverage.length;
                        if (!this.disable_paired_overlap_flag && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && rec.getAlignmentStart() < rec.getMateAlignmentStart() && rec.getAlignmentEnd() > rec.getMateAlignmentStart()) {
                            max_end1 = rec.getMateAlignmentStart() - 1;
                        }
                        for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
                            final int pos1 = block.getReferenceStart();
                            final int len = block.getLength();
                            for (int i = 0; i < len; i++) {
                                if (pos1 + i - 1 >= 0 && pos1 + i <= max_end1) {
                                    coverage[pos1 + i - 1]++;
                                }
                            }
                        }
                    }
                /* end rec */
                }
                /* end iter */
                progress.close();
                out.print(path);
                out.print("\t");
                out.print(sample);
                out.print("\t");
                out.print(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
                out.print("\t");
                out.print(count_raw_bases);
                out.print("\t");
                out.print(count_bases);
                out.print("\t");
                out.print(sum_coverage);
                out.print("\t");
                if (count_bases > 0) {
                    out.printf("%.2f", sum_coverage / (double) count_bases);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                final OptionalDouble median = discreteMedian_wg.getMedian();
                if (median.isPresent()) {
                    out.print(median.getAsDouble());
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (minCov != null) {
                    out.print(minCov);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (maxCov != null) {
                    out.print(maxCov + "\t" + maxCovPosition);
                } else {
                    out.print("N/A\tN/A");
                }
                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                    if (r.getMinInclusive() == null)
                        continue;
                    out.print("\t");
                    out.print(countMap_wg.count(r));
                    if (!countMap_wg.isEmpty()) {
                        out.print(" ");
                        out.printf("(%.2f%%)", (countMap_wg.count(r) / (countMap_wg.getTotal() * 1.0)) * 100.0);
                    }
                }
                out.println();
            }
        }
        out.flush();
        out.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(referenceSequenceFile);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) AlignmentBlock(htsjdk.samtools.AlignmentBlock) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) SAMRecord(htsjdk.samtools.SAMRecord) Objects(java.util.Objects) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) QueryInterval(htsjdk.samtools.QueryInterval) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) BitSet(java.util.BitSet) Pattern(java.util.regex.Pattern) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) Path(java.nio.file.Path) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) BitSet(java.util.BitSet) OptionalDouble(java.util.OptionalDouble) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) SAMFileHeader(htsjdk.samtools.SAMFileHeader) AlignmentBlock(htsjdk.samtools.AlignmentBlock)

Example 20 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class SetFileTools method fromBed.

private int fromBed(final List<String> args) throws IOException {
    final String input = oneFileOrNull(args);
    final Function<BedLine, String> bed2name = bed -> {
        if (bed.getColumnCount() < 4)
            throw new IllegalArgumentException("Expected 4 columns but got " + bed);
        return bed.get(3);
    };
    try (BufferedReader br = super.openBufferedReader(input)) {
        try (BedLineReader blr = new BedLineReader(br, input)) {
            blr.setValidationStringency(validationStringency);
            blr.setContigNameConverter(this.theCtgConverter);
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                final EqualIterator<BedLine> iter = new EqualIterator<BedLine>(blr.stream().iterator(), (A, B) -> bed2name.apply(A).compareTo(bed2name.apply(B)));
                while (iter.hasNext()) {
                    final List<BedLine> lines = iter.next();
                    final List<Locatable> L = sortAndMerge(lines);
                    pw.print(bed2name.apply(lines.get(0)));
                    for (int i = 0; i < L.size(); i++) {
                        pw.print(i == 0 ? "\t" : ",");
                        final Locatable rec = L.get(i);
                        pw.print(noChr(rec.getContig()));
                        pw.print(":");
                        pw.print(rec.getStart());
                        pw.print("-");
                        pw.print(rec.getEnd());
                    }
                    pw.println();
                }
                iter.close();
                pw.flush();
            }
        }
    }
    return 0;
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SetFileRecord(com.github.lindenb.jvarkit.setfile.SetFileRecord) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) AbstractCloseableIterator(com.github.lindenb.jvarkit.iterator.AbstractCloseableIterator) Collectors(java.util.stream.Collectors) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SetFileReaderFactory(com.github.lindenb.jvarkit.setfile.SetFileReaderFactory) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) LinkedList(java.util.LinkedList) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) Paths(java.nio.file.Paths) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) BufferedReader(java.io.BufferedReader) PrintWriter(java.io.PrintWriter) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

BedLineReader (com.github.lindenb.jvarkit.bed.BedLineReader)22 ArrayList (java.util.ArrayList)16 PrintWriter (java.io.PrintWriter)15 List (java.util.List)15 Parameter (com.beust.jcommander.Parameter)14 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)14 Program (com.github.lindenb.jvarkit.util.jcommander.Program)14 Logger (com.github.lindenb.jvarkit.util.log.Logger)14 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 Path (java.nio.file.Path)13 BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)12 Set (java.util.Set)12 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)10 Collections (java.util.Collections)10 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)9 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 CloseableIterator (htsjdk.samtools.util.CloseableIterator)9 Locatable (htsjdk.samtools.util.Locatable)9