Search in sources :

Example 21 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VCFBedSetFilter method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
    final Set<String> contigs_not_found = new HashSet<>();
    final VCFHeader h2 = new VCFHeader(r.getHeader());
    final VCFFilterHeaderLine filter;
    if (!StringUtil.isBlank(this.filterName)) {
        filter = new VCFFilterHeaderLine(this.filterName, "Variant " + (this.tabixBlackFile != null ? " overlapping any bed record of " : " that do not overlap any bed record of ") + tabixFile);
        h2.addMetaDataLine(filter);
    } else {
        filter = null;
    }
    JVarkitVersion.getInstance().addMetaData(this, h2);
    final ContigNameConverter ctgNameConverter;
    if (this.bedReader != null) {
        ctgNameConverter = ContigNameConverter.fromContigSet(this.bedReader.getContigs());
    } else {
        ctgNameConverter = ContigNameConverter.fromIntervalTreeMap(this.intervalTreeMap);
    }
    final SAMSequenceDictionary dict = r.getHeader().getSequenceDictionary();
    w.writeHeader(h2);
    while (r.hasNext()) {
        final VariantContext ctx = r.next();
        boolean set_filter = false;
        final String convert_contig = ctgNameConverter.apply(ctx.getContig());
        final int ctx_start = Math.max(1, ctx.getStart() - this.extend_bases);
        final int ctx_end;
        if (dict == null) {
            ctx_end = ctx.getEnd() + this.extend_bases;
        } else {
            final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
            if (ssr == null)
                throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
            ctx_end = Math.min(ssr.getSequenceLength(), ctx.getEnd() + this.extend_bases);
        }
        if (StringUtil.isBlank(convert_contig)) {
            if (debug)
                LOG.warn("Cannot convert contig " + ctx.getContig());
            if (contigs_not_found.size() < 100) {
                if (contigs_not_found.add(ctx.getContig())) {
                    LOG.warn("Cannot convert variant contig " + ctx.getContig() + " to bed file. (Contig is not in BED file)");
                }
            }
            set_filter = false;
        } else if (this.intervalTreeMap != null) {
            if (this.intervalTreeMap.getOverlapping(new SimpleInterval(convert_contig, ctx_start, ctx_end)).stream().anyMatch(LOC -> testOverlap(ctx_start, ctx_end, LOC))) {
                if (debug)
                    LOG.warn("treemap.overlap=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
                set_filter = true;
            }
        } else {
            try (CloseableIterator<BedLine> iter = this.bedReader.iterator(convert_contig, ctx_start - 1, ctx_end + 1)) {
                while (iter.hasNext()) {
                    final BedLine bed = iter.next();
                    if (bed == null)
                        continue;
                    if (!testOverlap(ctx_start, ctx_end, bed))
                        continue;
                    if (debug)
                        LOG.warn("tabix=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
                    set_filter = true;
                    break;
                }
            } catch (IOException err) {
                throw new RuntimeIOException(err);
            }
        }
        /* variant is in whitelist, we want to keep it */
        if (this.tabixWhiteFile != null) {
            set_filter = !set_filter;
            if (debug)
                LOG.warn("inverse. Now Filter=" + set_filter + " for " + ctx.getContig() + ":" + ctx.getStart());
        }
        if (!set_filter) {
            if (debug)
                LOG.warn("no filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
            w.add(ctx);
            continue;
        }
        if (filter != null) {
            if (debug)
                LOG.warn("adding filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filter.getID());
            w.add(vcb.make());
        } else {
            if (debug)
                LOG.warn("Ignoring " + ctx.getContig() + ":" + ctx.getStart());
        }
    }
    if (!contigs_not_found.isEmpty()) {
        LOG.warn("The following contigs were not found: " + String.join(" ", contigs_not_found) + "...");
    }
    return 0;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) IndexedBedReader(com.github.lindenb.jvarkit.util.bio.bed.IndexedBedReader) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 22 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class MantaMerger method doWork.

@Override
public int doWork(final List<String> args) {
    VariantContextWriter out = null;
    try {
        final Map<String, VcfInput> sample2inputs = new TreeMap<>();
        SAMSequenceDictionary dict = null;
        final List<String> lines;
        if (args.size() == 1 && args.get(0).endsWith(".list")) {
            lines = Files.lines(Paths.get(args.get(0))).filter(L -> !(StringUtils.isBlank(L) || L.startsWith("#"))).collect(Collectors.toList());
        } else {
            lines = args;
        }
        for (final String line : lines) {
            final String[] tokens = CharSplitter.TAB.split(line);
            final VcfInput vcfInput = new VcfInput();
            vcfInput.vcfPath = Paths.get(tokens[0]);
            IOUtil.assertFileIsReadable(vcfInput.vcfPath);
            final SAMSequenceDictionary dict1 = SequenceDictionaryUtils.extractRequired(vcfInput.vcfPath);
            if (dict == null) {
                dict = dict1;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict1)) {
                throw new JvarkitException.DictionariesAreNotTheSame(dict1, dict);
            }
            if (tokens.length < 2 || StringUtils.isBlank(tokens[1])) {
                try (VCFReader r = VCFReaderFactory.makeDefault().open(vcfInput.vcfPath, false)) {
                    List<String> snl = r.getHeader().getSampleNamesInOrder();
                    if (snl.size() == 1) {
                        vcfInput.sample = snl.get(0);
                    } else {
                        vcfInput.sample = vcfInput.vcfPath.toString();
                    }
                }
            } else {
                vcfInput.sample = tokens[1];
            }
            if (sample2inputs.containsKey(vcfInput.sample)) {
                LOG.error("duplicate sample " + vcfInput.sample);
                return -1;
            }
            sample2inputs.put(vcfInput.sample, vcfInput);
        }
        if (sample2inputs.isEmpty()) {
            LOG.error("no input found");
            return -1;
        }
        if (!StringUtils.isBlank(this.limitContig) && dict.getSequence(this.limitContig) == null) {
            LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(this.limitContig, dict));
            return -1;
        }
        final Predicate<VariantContext> bedPredicate;
        if (this.excludeBedPath != null) {
            final BedLineCodec codec = new BedLineCodec();
            final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
            final IntervalTreeMap<Boolean> map = new IntervalTreeMap<>();
            try (BufferedReader br = IOUtils.openPathForBufferedReading(this.excludeBedPath)) {
                br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> codec.decode(L)).filter(L -> L != null).filter(B -> !StringUtils.isBlank(converter.apply(B.getContig()))).map(B -> new Interval(converter.apply(B.getContig()), B.getStart(), B.getEnd())).forEach(R -> map.put(R, true));
            }
            bedPredicate = V -> !map.containsOverlapping(V);
        } else {
            bedPredicate = V -> true;
        }
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
        metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
        final VCFInfoHeaderLine infoSvLen = new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation length");
        metaData.add(infoSvLen);
        final VCFInfoHeaderLine infoNSamples = new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples");
        metaData.add(infoNSamples);
        final VCFInfoHeaderLine infoSamples = new VCFInfoHeaderLine("SAMPLES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "amples");
        metaData.add(infoSamples);
        final VCFFilterHeaderLine filterSameNext = new VCFFilterHeaderLine("NEXT", "next variant in VCF is the same.");
        metaData.add(filterSameNext);
        final VCFFilterHeaderLine filterSamePrev = new VCFFilterHeaderLine("PREV", "next variant in VCF is the same.");
        metaData.add(filterSamePrev);
        final VCFHeader header = new VCFHeader(metaData, sample2inputs.keySet());
        header.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, header);
        out = VCFUtils.createVariantContextWriterToPath(this.outputFile);
        out.writeHeader(header);
        final Decoy decoy = Decoy.getDefaultInstance();
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            if (!StringUtils.isBlank(this.limitContig)) {
                if (!ssr.getSequenceName().equals(this.limitContig))
                    continue;
            }
            LOG.info("contig " + ssr.getSequenceName());
            if (decoy.isDecoy(ssr.getSequenceName()))
                continue;
            final Map<SVKey, Set<String>> variants2samples = new HashMap<>();
            for (final VcfInput vcfinput : sample2inputs.values()) {
                // reset count for this contig
                vcfinput.contigCount = 0;
                try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
                    vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength()).stream().filter(V -> discard_bnd == false || !V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND")).filter(bedPredicate).map(V -> new VariantContextBuilder(V).unfiltered().noID().noGenotypes().rmAttribute("EVENT").rmAttribute("HOMSEQ").rmAttribute("HOMLEN").rmAttribute("SVINSSEQ").rmAttribute("SVINSLEN").rmAttribute("MATEID").rmAttribute("LEFT_SVINSSEQ").rmAttribute("RIGHT_SVINSSEQ").rmAttribute("BND_DEPTH").rmAttribute("MATE_BND_DEPTH").rmAttribute("JUNCTION_QUAL").rmAttribute("CIGAR").make()).forEach(V -> {
                        final SVKey key1 = new SVKey(V);
                        if (!svComparator.test(V, V))
                            throw new RuntimeException("compare to self failed ! " + V);
                        variants2samples.put(key1, new HashSet<>());
                        vcfinput.contigCount++;
                    });
                }
            }
            if (variants2samples.isEmpty())
                continue;
            // build an interval tree for a faster access
            final IntervalTree<SVKey> intervalTree = new IntervalTree<>();
            for (final SVKey key : variants2samples.keySet()) {
                final SimpleInterval r = new SimpleInterval(key.archetype).extend(this.svComparator.getBndDistance() + 1);
                intervalTree.put(r.getStart(), r.getEnd(), key);
                // paranoidcheck interval is ok to find current archetype
                boolean found = false;
                final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
                while (nodeIter.hasNext()) {
                    final SVKey key1 = nodeIter.next().getValue();
                    if (this.svComparator.test(key1.archetype, key.archetype)) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    out.close();
                    throw new RuntimeException("cannot find self " + key.archetype + " in " + r);
                }
            }
            for (final VcfInput vcfinput : sample2inputs.values()) {
                try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
                    final CloseableIterator<VariantContext> iter = vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength());
                    while (iter.hasNext()) {
                        final VariantContext ctx = iter.next();
                        if (this.discard_bnd && ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
                            continue;
                        if (!bedPredicate.test(ctx))
                            continue;
                        final SimpleInterval r = new SimpleInterval(ctx).extend(this.svComparator.getBndDistance() + 1);
                        final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
                        while (nodeIter.hasNext()) {
                            final SVKey key1 = nodeIter.next().getValue();
                            if (!this.svComparator.test(key1.archetype, ctx))
                                continue;
                            final Set<String> samples = variants2samples.get(key1);
                            samples.add(vcfinput.sample);
                        }
                    }
                    iter.close();
                }
            }
            final Comparator<VariantContext> sorter = new ContigDictComparator(dict).createLocatableComparator();
            final List<SVKey> orderedKeys = variants2samples.keySet().stream().filter(// no samples for this key ??!
            K -> !variants2samples.get(K).isEmpty()).sorted((A, B) -> sorter.compare(A.archetype, B.archetype)).collect(Collectors.toCollection(ArrayList::new));
            // remove duplicates
            int i = 0;
            while (i + 1 < orderedKeys.size()) {
                final SVKey key1 = orderedKeys.get(i);
                final SVKey key2 = orderedKeys.get(i + 1);
                if (svComparator.test(key1.archetype, key2.archetype) && // same samples
                variants2samples.get(key1).equals(variants2samples.get(key2))) {
                    orderedKeys.remove(i + 1);
                } else {
                    i++;
                }
            }
            for (int key_index = 0; key_index < orderedKeys.size(); key_index++) {
                final SVKey key = orderedKeys.get(key_index);
                final Set<String> samples = variants2samples.get(key);
                final Allele refAllele = key.archetype.getReference();
                final Allele altAllele = Allele.create("<SV>", false);
                final Object svType = key.archetype.getAttribute(VCFConstants.SVTYPE, ".");
                final VariantContextBuilder vcb = new VariantContextBuilder();
                vcb.chr(key.archetype.getContig());
                vcb.start(key.archetype.getStart());
                vcb.stop(key.archetype.getEnd());
                vcb.log10PError(key.archetype.getLog10PError());
                vcb.alleles(Arrays.asList(refAllele, altAllele));
                vcb.attribute(VCFConstants.END_KEY, key.archetype.getEnd());
                vcb.attribute(VCFConstants.SVTYPE, svType);
                vcb.attribute(infoSvLen.getID(), (svType.equals("DEL") ? -1 : 1) * key.archetype.getLengthOnReference());
                vcb.attribute(infoNSamples.getID(), samples.size());
                vcb.attribute(infoSamples.getID(), samples.stream().sorted().collect(Collectors.toList()));
                int ac = 0;
                final List<Genotype> genotypes = new ArrayList<>(sample2inputs.size());
                for (final String sn : sample2inputs.keySet()) {
                    List<Allele> gta;
                    if (samples.contains(sn)) {
                        ac++;
                        gta = Arrays.asList(refAllele, altAllele);
                    } else {
                        gta = Arrays.asList(refAllele, refAllele);
                    }
                    genotypes.add(new GenotypeBuilder(sn, gta).make());
                }
                vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, ac);
                vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, sample2inputs.size() * 2);
                if (ac > 0) {
                    vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, ac / (float) sample2inputs.size() * 2);
                }
                if (key_index > 0 && svComparator.test(key.archetype, orderedKeys.get(key_index - 1).archetype)) {
                    vcb.filter(filterSamePrev.getID());
                }
                if (key_index + 1 < orderedKeys.size() && svComparator.test(key.archetype, orderedKeys.get(key_index + 1).archetype)) {
                    System.err.println("SAME\n" + key.archetype + "\n" + orderedKeys.get(key_index + 1).archetype);
                    vcb.filter(filterSameNext.getID());
                }
                vcb.genotypes(genotypes);
                out.add(vcb.make());
            }
        }
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Decoy(com.github.lindenb.jvarkit.samtools.Decoy) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) Interval(htsjdk.samtools.util.Interval) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Iterator(java.util.Iterator) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) StructuralVariantComparator(com.github.lindenb.jvarkit.variant.sv.StructuralVariantComparator) IntervalTree(htsjdk.samtools.util.IntervalTree) TreeMap(java.util.TreeMap) Paths(java.nio.file.Paths) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IntervalTree(htsjdk.samtools.util.IntervalTree) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) Decoy(com.github.lindenb.jvarkit.samtools.Decoy) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) Set(java.util.Set) HashSet(java.util.HashSet) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) VCFReader(htsjdk.variant.vcf.VCFReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) TreeMap(java.util.TreeMap) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 23 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class GridssPostProcessVcf method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
    SortingCollection<VariantContext> sorter1 = null;
    SortingCollection<VariantContext> sorter2 = null;
    PrintWriter debug = null;
    try {
        debug = this.debugFile == null ? new PrintWriter(new NullOuputStream()) : new PrintWriter(Files.newBufferedWriter(this.debugFile));
        final VCFHeader header = iterin.getHeader();
        LOG.info("reading input.");
        final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(EVENT_KEY, "").compareTo(B.getAttributeAsString(EVENT_KEY, ""));
        sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        while (iterin.hasNext()) {
            final VariantContext ctx = iterin.next();
            if (!ctx.hasAttribute(EVENT_KEY)) {
                LOG.warn("skipping variant without INFO/" + EVENT_KEY + " at " + toString(ctx));
                continue;
            }
            sorter1.add(ctx);
        }
        sorter1.doneAdding();
        sorter1.setDestructiveIteration(true);
        LOG.info("done adding");
        CloseableIterator<VariantContext> iter2 = sorter1.iterator();
        @SuppressWarnings("resource") final EqualRangeIterator<VariantContext> equal_range = new EqualRangeIterator<>(iter2, comparator);
        sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        while (equal_range.hasNext()) {
            final List<VariantContext> variants = equal_range.next();
            if (variants.size() == 1) {
                final VariantContext ctx = variants.get(0);
                final Allele alt = ctx.getAlleles().get(1);
                // INSERTION LIKE. chr22:1234 A/.AAAACAAGGAG
                if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) < length(alt)) {
                    final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx);
                    vcb1.attribute(VCFConstants.SVTYPE, "INS");
                    vcb1.attribute("SVLEN", length(alt) - length(ctx.getReference()));
                    sorter2.add(vcb1.make());
                } else // STRANGE ? no ? change
                if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) == 1 && length(alt) == 1) {
                    sorter2.add(ctx);
                } else {
                    sorter2.add(ctx);
                    debug.println("ALONE " + toString(ctx));
                }
            } else if (variants.size() != 2) {
                for (final VariantContext ctx : variants) {
                    debug.println("SIZE>2 " + toString(ctx));
                    sorter2.add(ctx);
                }
            } else {
                final VariantContext ctx1 = variants.get(0);
                final VariantContext ctx2 = variants.get(1);
                final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
                final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
                if (brk1 == null || brk2 == null) {
                    debug.println("expected two bnd " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                    return -1;
                // continue;
                }
                /* should be the same breakend */
                if (!ctx1.getContig().equals(brk2.getContig()) || !ctx2.getContig().equals(brk1.getContig()) || ctx1.getStart() != brk2.getStart() || ctx2.getStart() != brk1.getStart()) {
                    debug.println("expected concordant bnd " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                    return -1;
                // continue;
                }
                final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
                final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
                if (!ctx1.contigsMatch(ctx2)) {
                    vcb1.attribute(VCFConstants.SVTYPE, "CTX");
                    vcb2.attribute(VCFConstants.SVTYPE, "CTX");
                    sorter2.add(vcb1.make());
                    sorter2.add(vcb2.make());
                    continue;
                }
                if (ctx1.getStart() > ctx2.getStart()) {
                    debug.println("CTX1>CTX2?" + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(vcb1.make());
                    sorter2.add(vcb2.make());
                    continue;
                }
                final BndType bndType1 = getBndType(brk1);
                final BndType bndType2 = getBndType(brk2);
                if (bndType1.equals(BndType.DNA_OPEN) && bndType2.equals(BndType.CLOSE_DNA)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DEL>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "DEL");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx1.getEnd() - ctx2.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.CLOSE_DNA) && bndType2.equals(BndType.DNA_OPEN)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DUP>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "DUP");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.OPEN_DNA) && bndType2.equals(BndType.OPEN_DNA)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<INV>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "INV");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.DNA_CLOSE) && bndType2.equals(BndType.DNA_CLOSE)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<SV>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "SV");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else {
                    debug.println("How to handle " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                }
            }
        }
        equal_range.close();
        iter2.close();
        sorter1.cleanup();
        sorter1 = null;
        sorter2.doneAdding();
        sorter2.setDestructiveIteration(true);
        iter2 = sorter2.iterator();
        final VCFHeader header2 = new VCFHeader(header);
        JVarkitVersion.getInstance().addMetaData(this, header2);
        final VCFInfoHeaderLine originalAlt = new VCFInfoHeaderLine("BND", 1, VCFHeaderLineType.String, "Original ALT allele.");
        header.addMetaDataLine(originalAlt);
        header.addMetaDataLine(new VCFInfoHeaderLine(originalAlt.getID(), 1, VCFHeaderLineType.Integer, "SV length"));
        header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
        header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
        out.writeHeader(header2);
        while (iter2.hasNext()) {
            out.add(iter2.next());
        }
        iter2.close();
        sorter2.cleanup();
        sorter2 = null;
        debug.flush();
        debug.close();
        debug = null;
        return 0;
    } catch (final Throwable err) {
        err.printStackTrace();
        LOG.error(err);
        return -1;
    } finally {
        if (sorter1 != null)
            sorter1.cleanup();
        if (sorter2 != null)
            sorter2.cleanup();
        if (debug != null)
            try {
                debug.close();
            } catch (final Throwable err2) {
            }
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) Breakend(com.github.lindenb.jvarkit.variant.variantcontext.Breakend) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) Breakend(com.github.lindenb.jvarkit.variant.variantcontext.Breakend) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter)

Example 24 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class Bam2Wig method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.win_shift <= 0) {
        LOG.error("window shift<=0");
        return -1;
    }
    if (this.window_span <= 0) {
        LOG.error("window size<=0");
        return -1;
    }
    final SimpleInterval interval;
    PrintWriter pw = null;
    CloseableIterator<SAMRecord> samRecordIterator = null;
    final List<SamReader> samReaders = new ArrayList<>();
    final List<CloseableIterator<SAMRecord>> merginIterators = new ArrayList<>();
    try {
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.LENIENT);
        if (args.isEmpty()) {
            if (!StringUtil.isBlank(region_str)) {
                LOG.error("cannot specify region for stdin");
                return -1;
            }
            interval = null;
            samReaders.add(srf.open(SamInputResource.of(stdin())));
            samRecordIterator = samReaders.get(0).iterator();
        } else if (args.size() == 1 && !args.get(0).endsWith(".list")) {
            samReaders.add(srf.open(SamInputResource.of(new File(args.get(0)))));
            if (StringUtil.isBlank(this.region_str)) {
                samRecordIterator = samReaders.get(0).iterator();
                interval = null;
            } else {
                interval = IntervalParserFactory.newInstance().dictionary(samReaders.get(0).getFileHeader().getSequenceDictionary()).enableWholeContig().make().apply(region_str).orElseThrow(IntervalParserFactory.exception(this.region_str));
                samRecordIterator = samReaders.get(0).query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
            }
        } else {
            final List<Path> samFiles = IOUtils.unrollPaths(args);
            if (samFiles.isEmpty()) {
                LOG.error("No Input SAM file");
                return -1;
            }
            final SAMSequenceDictionary dict0 = SequenceDictionaryUtils.extractRequired(samFiles.get(0));
            samFiles.stream().forEach(F -> {
                final SAMSequenceDictionary dicti = SequenceDictionaryUtils.extractRequired(F);
                if (!SequenceUtil.areSequenceDictionariesEqual(dicti, dict0)) {
                    throw new JvarkitException.DictionariesAreNotTheSame(dict0, dicti);
                }
            });
            for (final Path bamFile : samFiles) {
                LOG.info("opening " + bamFile);
                samReaders.add(srf.open(bamFile));
            }
            final SamFileHeaderMerger mergedheader = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false);
            final Map<SamReader, CloseableIterator<SAMRecord>> reader2iter = new HashMap<>();
            if (StringUtil.isBlank(this.region_str)) {
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.iterator());
                }
                interval = null;
            } else {
                interval = IntervalParserFactory.newInstance().dictionary(dict0).enableWholeContig().make().apply(region_str).orElseThrow(IntervalParserFactory.exception(region_str));
                LOG.info("interval :" + interval);
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false));
                }
            }
            merginIterators.addAll(reader2iter.values());
            samRecordIterator = new MergingSamRecordIterator(mergedheader, reader2iter, true);
        }
        for (final SamReader sr : samReaders) {
            if (sr.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
                LOG.error("one of your bam input is not sorted on coordinate");
                return -1;
            }
        }
        pw = openPathOrStdoutAsPrintWriter(this.outputFile);
        run(pw, samRecordIterator, samReaders.get(0).getFileHeader().getSequenceDictionary(), interval);
        samRecordIterator.close();
        samRecordIterator = null;
        CloserUtil.close(samReaders);
        samReaders.clear();
        pw.flush();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(merginIterators);
        CloserUtil.close(samRecordIterator);
        CloserUtil.close(samReaders);
        CloserUtil.close(pw);
        pw = null;
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ArrayList(java.util.ArrayList) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Predicate(java.util.function.Predicate) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) Objects(java.util.Objects) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SamRecordFilterFactory(com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Path(java.nio.file.Path) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) PrintWriter(java.io.PrintWriter)

Example 25 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class BamAlleleBalance method doWork.

@Override
public int doWork(List<String> args) {
    try {
        final List<Path> bamPaths = IOUtils.unrollPaths(args);
        if (bamPaths.isEmpty()) {
            LOG.error("Bam list is empty");
            return -1;
        }
        final List<Snp> snps = new ArrayList<>(10_000);
        try (VCFReader vr = VCFReaderFactory.makeDefault().open(this.variantsPath, false)) {
            try (CloseableIterator<VariantContext> iter = vr.iterator()) {
                while (iter.hasNext()) {
                    final VariantContext ctx = iter.next();
                    if (!ctx.isBiallelic() || ctx.isFiltered() || !ctx.isSNP())
                        continue;
                    snps.add(new Snp(ctx));
                }
            }
        }
        if (snps.isEmpty()) {
            LOG.error("snp list is empty");
            return -1;
        }
        LOG.info(String.valueOf(snps.size()) + " snp(s)");
        final Map<String, Counter<RangeOfDoubles.Range>> sample2count = new HashMap<>(bamPaths.size());
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null)
            srf.referenceSequence(this.faidx);
        for (final Path bamPath : bamPaths) {
            try (SamReader samReader = srf.open(bamPath)) {
                final String defaultPartition = IOUtils.getFilenameWithoutCommonSuffixes(bamPath);
                final SAMFileHeader header = samReader.getFileHeader();
                final Set<String> samples = header.getReadGroups().stream().map(RG -> this.groupBy.apply(RG, defaultPartition)).collect(Collectors.toSet());
                samples.stream().forEach(SN -> sample2count.putIfAbsent(SN, new Counter<>()));
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
                for (final Snp snp : snps) {
                    final String bamContig = converter.apply(snp.contigSrc);
                    if (StringUtils.isBlank(bamContig))
                        continue;
                    final Map<String, SiteInfo> sample2siteinfo = new HashMap<>(samples.size());
                    for (final String sn : samples) {
                        sample2siteinfo.put(sn, new SiteInfo());
                    }
                    try (CloseableIterator<SAMRecord> iter = samReader.queryOverlapping(bamContig, snp.pos1, snp.pos1)) {
                        while (iter.hasNext()) {
                            final SAMRecord rec = iter.next();
                            if (!SAMRecordDefaultFilter.accept(rec, this.mapq))
                                continue;
                            final SAMReadGroupRecord rg = rec.getReadGroup();
                            if (rg == null)
                                continue;
                            String sample = this.groupBy.apply(rg, defaultPartition);
                            if (StringUtils.isBlank(sample) || !sample2siteinfo.containsKey(sample))
                                continue;
                            final Cigar cigar = rec.getCigar();
                            if (cigar == null || cigar.isEmpty())
                                continue;
                            byte[] bases = rec.getReadBases();
                            if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
                                continue;
                            final int readpos1 = rec.getReadPositionAtReferencePosition(snp.pos1);
                            if (readpos1 < 1)
                                continue;
                            final int readpos0 = readpos1 - 1;
                            if (readpos0 < 0 || readpos0 >= bases.length)
                                continue;
                            final char base = (char) Character.toUpperCase(bases[readpos0]);
                            final SiteInfo si = sample2siteinfo.get(sample);
                            if (si == null)
                                continue;
                            if (base == snp.ref) {
                                si.countRef++;
                            } else if (base == snp.alt) {
                                si.countAlt++;
                            }
                        }
                        for (final String sample : sample2siteinfo.keySet()) {
                            final SiteInfo si = sample2siteinfo.get(sample);
                            final int depth = si.countAlt + si.countRef;
                            if (depth < this.min_depth)
                                continue;
                            // must be het
                            if (si.countAlt == 0 || si.countRef == 0)
                                continue;
                            final double ratio = si.countAlt / (double) depth;
                            final RangeOfDoubles.Range range = this.ratios.getRange(ratio);
                            sample2count.get(sample).incr(range);
                        }
                    }
                // end of iter
                }
            // end of loop over snps
            }
        }
        // print report
        try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
            final List<String> sortedSamples = sample2count.keySet().stream().sorted((A, B) -> Long.compare(sample2count.get(A).getTotal(), sample2count.get(B).getTotal())).collect(Collectors.toList());
            pw.print("RANGE");
            for (final String sn : sortedSamples) {
                pw.print("\t");
                pw.print(sn);
            }
            pw.println();
            for (final RangeOfDoubles.Range r : this.ratios.getRanges()) {
                pw.print(r.toString());
                for (final String sn : sortedSamples) {
                    pw.print("\t");
                    pw.print(sample2count.get(sn).count(r));
                }
                pw.println();
            }
            pw.flush();
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ArrayList(java.util.ArrayList) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) RangeOfDoubles(com.github.lindenb.jvarkit.math.RangeOfDoubles) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReaderFactory(htsjdk.samtools.SamReaderFactory) HashMap(java.util.HashMap) RangeOfDoubles(com.github.lindenb.jvarkit.math.RangeOfDoubles) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) VCFReader(htsjdk.variant.vcf.VCFReader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

CloseableIterator (htsjdk.samtools.util.CloseableIterator)103 List (java.util.List)86 Logger (com.github.lindenb.jvarkit.util.log.Logger)85 Parameter (com.beust.jcommander.Parameter)82 Program (com.github.lindenb.jvarkit.util.jcommander.Program)78 ArrayList (java.util.ArrayList)73 Collectors (java.util.stream.Collectors)71 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)69 Path (java.nio.file.Path)69 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)66 CloserUtil (htsjdk.samtools.util.CloserUtil)64 Set (java.util.Set)64 VCFHeader (htsjdk.variant.vcf.VCFHeader)59 VariantContext (htsjdk.variant.variantcontext.VariantContext)54 IOException (java.io.IOException)53 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)51 SAMRecord (htsjdk.samtools.SAMRecord)51 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)51 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)50 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)49