Search in sources :

Example 41 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class MakeMiniBam method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archive = null;
    int id_generator = 0;
    final Set<String> outputFileNames = new HashSet<>();
    try {
        if (StringUtils.isBlank(this.filePrefix) || this.filePrefix.equals("now")) {
            final SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd");
            this.filePrefix = simpleDateFormat.format(new Date()) + ".";
        }
        if (!this.filePrefix.endsWith(".")) {
            this.filePrefix += ".";
        }
        IOUtil.assertDirectoryIsWritable(tmpDir);
        final List<Path> bamFiles = IOUtils.unrollPaths(args);
        if (bamFiles.isEmpty()) {
            LOG.error("no bam file defined");
            return -1;
        }
        final List<Locatable> locatables = this.intervalListProvider.enableBreakEndInterval(!disable_sv_bnd).enableSinglePoint().stream().collect(Collectors.toList());
        if (locatables.isEmpty()) {
            LOG.error("no position defined");
            return -1;
        }
        final SAMFileWriterFactory swf = new SAMFileWriterFactory();
        swf.setCompressionLevel(9);
        swf.setCreateIndex(true);
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.referencePath != null)
            srf.referenceSequence(this.referencePath);
        archive = ArchiveFactory.open(this.outputFile);
        archive.setCompressionLevel(Deflater.NO_COMPRESSION);
        for (final Path bamFile : bamFiles) {
            LOG.info(bamFile.toString());
            final StopWatch stopWatch = new StopWatch();
            stopWatch.start();
            final SamReader sr = srf.open(bamFile);
            if (!sr.hasIndex()) {
                sr.close();
                LOG.error("file " + bamFile + " is not indexed.");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            if (!header.getSortOrder().equals(SortOrder.coordinate)) {
                sr.close();
                LOG.error("file " + bamFile + " is not sorted on coordinate.");
                return -1;
            }
            final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
            final Optional<String> dictLabel = SequenceDictionaryUtils.getBuildName(dict);
            final String labelSuffix = (dictLabel.isPresent() ? "." + dictLabel.get() : "") + (locatables.size() == 1 ? "." + locatables.get(0).getContig() + "_" + locatables.get(0).getStart() + (locatables.get(0).getStart() == locatables.get(0).getEnd() ? "" : "_" + locatables.get(0).getEnd()) : "");
            final ContigNameConverter ctgConvert = ContigNameConverter.fromOneDictionary(dict);
            final QueryInterval[] array = locatables.stream().flatMap(loc -> {
                if (this.bound_edge < 1 || loc.getLengthOnReference() <= this.bound_edge) {
                    return Collections.singletonList(loc).stream();
                }
                return Arrays.asList((Locatable) new SimpleInterval(loc.getContig(), loc.getStart(), loc.getStart()), (Locatable) new SimpleInterval(loc.getContig(), loc.getEnd(), loc.getEnd())).stream();
            }).map(LOC -> {
                final String contig = ctgConvert.apply(LOC.getContig());
                if (StringUtils.isBlank(contig)) {
                    LOG.warn("Cannot find " + LOC.getContig() + " in " + bamFile);
                    return null;
                }
                final SAMSequenceRecord ssr = dict.getSequence(contig);
                if (LOC.getStart() > ssr.getSequenceLength()) {
                    LOG.warn("pos " + LOC + " is greater than chromosome size " + ssr.getSequenceLength() + " in " + bamFile);
                    return null;
                }
                return new QueryInterval(ssr.getSequenceIndex(), Math.max(1, LOC.getStart() - extend), Math.min(ssr.getSequenceLength(), LOC.getEnd() + extend));
            }).filter(Q -> Q != null).collect(HtsCollectors.optimizedQueryIntervals());
            final Path tmpBam = Files.createTempFile(this.tmpDir, "tmp.", ".bam");
            final SAMFileHeader header2 = header.clone();
            final SAMProgramRecord prg = header2.createProgramRecord();
            prg.setProgramName(this.getProgramName());
            prg.setProgramVersion(this.getGitHash());
            prg.setCommandLine(this.getProgramCommandLine());
            JVarkitVersion.getInstance().addMetaData(this, header2);
            header2.addComment("MiniBam : Bam was " + bamFile);
            try (SAMFileWriter sfw = swf.makeBAMWriter(header2, true, tmpBam)) {
                if (array.length > 0) {
                    try (CloseableIterator<SAMRecord> ssr = sr.query(array, false)) {
                        while (ssr.hasNext()) {
                            final SAMRecord rec = ssr.next();
                            if (this.samRecordFilter.filterOut(rec))
                                continue;
                            rec.setAttribute(SAMTag.PG.name(), prg.getId());
                            sfw.addAlignment(rec);
                        }
                    }
                }
            }
            final Path tmpBai = SamFiles.findIndex(tmpBam);
            if (!Files.exists(tmpBai)) {
                LOG.error("Cannot find tmp bai Index for " + bamFile + " " + tmpBam);
                return -1;
            }
            final String sampleName1 = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(null);
            final String sampleName;
            if (!StringUtils.isBlank(sampleName1)) {
                sampleName = sampleName1;
            } else if (this.allow_no_sample) {
                sampleName = IOUtils.getFilenameWithoutSuffix(bamFile, 1);
                LOG.warn("No Read group in " + bamFile + " using filename : " + sampleName);
            } else {
                throw new IllegalArgumentException("No Sample found in " + bamFile + ". Use --no-samples option ?");
            }
            String filename = this.filePrefix + sampleName + labelSuffix;
            while (outputFileNames.contains(filename)) {
                filename = this.filePrefix + sampleName + "." + (id_generator++) + labelSuffix;
            }
            outputFileNames.add(filename);
            archive.copyTo(tmpBam, filename + FileExtensions.BAM);
            archive.copyTo(tmpBai, filename + IOUtils.getFileSuffix(tmpBai));
            stopWatch.stop();
            LOG.info("Added " + StringUtils.niceFileSize(Files.size(tmpBam)) + "(bam) and " + StringUtils.niceFileSize(Files.size(tmpBai)) + " (Bai). " + StringUtils.niceDuration(stopWatch.getElapsedTime()));
            Files.deleteIfExists(tmpBam);
            Files.deleteIfExists(tmpBai);
        }
        if (!StringUtils.isBlank(this.userComment)) {
            try (final PrintWriter pw = archive.openWriter(this.filePrefix + (this.filePrefix.endsWith(".") ? "" : ".") + "README.md")) {
                pw.append(this.userComment);
                pw.println();
                pw.println("## BAMS");
                pw.println();
                for (final Path bamFile : bamFiles) pw.println("  * " + bamFile);
                pw.println();
                pw.println("## Date");
                pw.println();
                pw.println(new SimpleDateFormat("yyyyMMdd").format(new Date()));
                pw.flush();
            }
        }
        archive.close();
        archive = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(archive);
    }
}
Also used : Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Date(java.util.Date) SamFiles(htsjdk.samtools.SamFiles) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Deflater(java.util.zip.Deflater) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) Optional(java.util.Optional) SamReaderFactory(htsjdk.samtools.SamReaderFactory) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) SimpleDateFormat(java.text.SimpleDateFormat) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) HashSet(java.util.HashSet) SAMTag(htsjdk.samtools.SAMTag) StopWatch(htsjdk.samtools.util.StopWatch) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Locatable(htsjdk.samtools.util.Locatable) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) QueryInterval(htsjdk.samtools.QueryInterval) SamRecordFilterFactory(com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Date(java.util.Date) StopWatch(htsjdk.samtools.util.StopWatch) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleDateFormat(java.text.SimpleDateFormat) Locatable(htsjdk.samtools.util.Locatable)

Example 42 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class KnimeVariantHelper method parseVariantIntervalFilters.

public Predicate<VariantContext> parseVariantIntervalFilters(final String... array) {
    final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().make();
    Predicate<VariantContext> filter = V -> false;
    for (final String str : array) {
        final SimpleInterval interval = parser.apply(str).orElseThrow(IntervalParserFactory.exception(str));
        filter = filter.or(V -> V.overlaps(interval));
    }
    return filter;
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ParsingUtils(htsjdk.tribble.util.ParsingUtils) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) IndexFactory(htsjdk.tribble.index.IndexFactory) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) VcfToTable(com.github.lindenb.jvarkit.tools.vcf2table.VcfToTable) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Optional(java.util.Optional) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) VcfPeekVcf(com.github.lindenb.jvarkit.tools.vcfvcf.VcfPeekVcf) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) TabixIndex(htsjdk.tribble.index.tabix.TabixIndex) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) GenotypeLikelihoods(htsjdk.variant.variantcontext.GenotypeLikelihoods) VCFIterator(htsjdk.variant.vcf.VCFIterator) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringTokenizer(java.util.StringTokenizer) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) StreamSupport(java.util.stream.StreamSupport) VCFCodec(htsjdk.variant.vcf.VCFCodec) Index(htsjdk.tribble.index.Index) VCFConstants(htsjdk.variant.vcf.VCFConstants) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) File(java.io.File) IndexedBedReader(com.github.lindenb.jvarkit.util.bio.bed.IndexedBedReader) Paths(java.nio.file.Paths) IndexType(htsjdk.tribble.index.IndexFactory.IndexType) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 43 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class Straw method readHeader.

// reads the header, storing the positions of the normalization vectors and returning the master pointer
private long readHeader(final LittleEndianInputStream fin, final String intervalStr1, final String intervalStr2, final QueryInterval[] queryIntervals) throws IOException {
    final long master = readHeader(fin);
    Function<String, Optional<SimpleInterval>> intervalParser = IntervalParserFactory.newInstance().dictionary(this.dictionary).make();
    final SimpleInterval interval1 = intervalParser.apply(intervalStr1).orElseThrow(IntervalParserFactory.exception(intervalStr1));
    final SimpleInterval interval2 = intervalParser.apply(intervalStr2).orElseThrow(IntervalParserFactory.exception(intervalStr2));
    queryIntervals[0] = new QueryInterval(this.dictionary.getSequenceIndex(interval1.getContig()), interval1.getStart(), interval1.getEnd());
    queryIntervals[1] = new QueryInterval(this.dictionary.getSequenceIndex(interval2.getContig()), interval2.getStart(), interval2.getEnd());
    return master;
}
Also used : Optional(java.util.Optional) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 44 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfStrechToSvg method run.

private void run(final ArchiveFactory archive, final BedLine bed, final VCFHeader header, final VCFReader in, final PrintWriter manifest) {
    LOG.info("processing " + bed);
    final Set<String> limitSamples;
    if (StringUtils.isBlank(this.sampleStr)) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
    } else if (this.sampleStr.startsWith("^")) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
        limitSamples.removeAll(Arrays.stream(this.sampleStr.substring(1).split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet()));
    } else {
        limitSamples = Arrays.stream(this.sampleStr.split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    try (CloseableIterator<VariantContext> iter = in.query(bed)) {
        final List<VariantSet> L = iter.stream().filter(V -> acceptVariant(V)).map(V -> new VariantSet(V)).collect(Collectors.toCollection(ArrayList::new));
        if (L.isEmpty()) {
            LOG.warn("No valid variant found for \"" + bed + "\"");
            return;
        }
        int i = 0;
        while (i + 1 < L.size()) {
            if (L.get(i).withinDistanceOf(L.get(i + 1), this.withinDistance)) {
                L.get(i).variants.addAll(L.get(i + 1).variants);
                L.remove(i + 1);
            } else {
                i++;
            }
        }
        final int margin_left = 50;
        final int margin_right = 10;
        final double drawingAreaWidth = image_width_pixel - (margin_left + margin_right);
        final int intervalLength = L.stream().mapToInt(V -> V.getLengthOnReference()).sum();
        double x = 0;
        for (i = 0; i < L.size(); i++) {
            L.get(i).x = x;
            x += (L.get(i).getLengthOnReference() / (double) intervalLength) * drawingAreaWidth;
        }
        for (i = 0; i < L.size(); i++) {
            L.get(i).width = (i + 1 < L.size() ? L.get(i + 1).x : drawingAreaWidth) - L.get(i).x;
        }
        try {
            final DocumentBuilderFactory db = DocumentBuilderFactory.newInstance();
            final DocumentBuilder dom = db.newDocumentBuilder();
            this.document = dom.newDocument();
            final Element svgRoot = element("svg");
            this.document.appendChild(svgRoot);
            final String mainTitleStr = SequenceDictionaryUtils.getBuildName(dict).orElse("") + " " + new SimpleInterval(bed).toNiceString() + " length:" + StringUtils.niceInt(bed.getLengthOnReference());
            /* SVG title */
            {
                final Element title = element("title");
                svgRoot.appendChild(title);
                title.appendChild(text(mainTitleStr));
            }
            /* SVG style */
            {
                final String gtopacity = this.dynamicParams.getOrDefault("gt.opacity", "0.7");
                final Element style = element("style");
                svgRoot.appendChild(style);
                style.appendChild(text(".maintitle {text-anchor:middle;fill:blue} " + ".vc {stroke-width:0.5px;} " + ".transcript {stroke:black;stroke-width:1px;opacity:1;}" + ".exon {stroke:black;stroke-width:0.5px;fill:blue;opacity:1;}" + ".sample {fill:blue;font-size:7px;} " + ".samplelabel {stroke:gray;stroke-width:0.5px;font-size:" + this.dynamicParams.getOrDefault("sample.fontsize", "7") + "px;}\n" + ".coverage { fill:gray; stroke:yellow;opacity:0.2;} " + ".frame { fill:none; stroke: darkgray;} " + ".area0 {fill:white;}\n" + ".area1 {fill:floralwhite;}\n" + "circle.HOM_REF {fill:green;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HET {fill:blue;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HOM_VAR {fill:red;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "a {cursor: pointer;}\n"));
            }
            /* desc */
            {
                final Element descr = element("desc");
                svgRoot.appendChild(descr);
                descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
            }
            // main title
            {
                final Element gtitle = element("text", mainTitleStr);
                gtitle.setAttribute("class", "maintitle");
                gtitle.setAttribute("x", format(this.image_width_pixel / 2.0));
                gtitle.setAttribute("y", "15");
                svgRoot.appendChild(wrapLoc(gtitle, bed));
            }
            int margin_top = 50;
            double y = margin_top;
            final double min_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r1", "1"));
            final double max_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r2", "7"));
            final Element main_g = element("g");
            svgRoot.appendChild(main_g);
            /**
             * plot genes
             */
            if (!this.all_genes.isEmpty()) {
                final double transcript_height = 5;
                final double exon_height = (transcript_height - 1);
                final double save_y = y;
                final Element g_genes = element("g");
                g_genes.setAttribute("transform", "translate(" + margin_left + ",0)");
                main_g.appendChild(g_genes);
                /* loop over each vset */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    // find transcript in this vset
                    final List<Transcript> transcripts = this.all_genes.getOverlapping(vset).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(vset)).collect(Collectors.toList());
                    if (transcripts.isEmpty())
                        continue;
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_genes.appendChild(g_vset);
                    // y in this vset
                    double y2 = save_y;
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    /* loop over transcripts */
                    for (final Transcript tr : transcripts) {
                        final Element g_tr = element("g");
                        g_tr.setAttribute("transform", "translate(0," + format(y2) + ")");
                        g_vset.appendChild(g_tr);
                        final Element line = element("line");
                        line.setAttribute("class", "transcript");
                        line.setAttribute("x1", format(Math.max(0, base2pixel.applyAsDouble(tr.getStart()))));
                        line.setAttribute("y1", format(transcript_height / 2.0));
                        line.setAttribute("x2", format(Math.min(vset.width, base2pixel.applyAsDouble(tr.getEnd()))));
                        line.setAttribute("y2", format(transcript_height / 2.0));
                        line.appendChild(element("title", tr.getId()));
                        g_tr.appendChild(wrapLoc(line, tr));
                        /* loop over exons */
                        for (final Exon exon : tr.getExons()) {
                            if (!exon.overlaps(vset))
                                continue;
                            final Element exRect = element("rect");
                            exRect.setAttribute("class", "exon");
                            final double x_start = Math.max(0, base2pixel.applyAsDouble(exon.getStart()));
                            exRect.setAttribute("x", format(x_start));
                            exRect.setAttribute("y", format(transcript_height / 2.0 - exon_height / 2.0));
                            final double x_end = Math.min(vset.width, base2pixel.applyAsDouble(exon.getEnd()));
                            exRect.setAttribute("width", format(x_end - x_start));
                            exRect.setAttribute("height", format(exon_height));
                            exRect.appendChild(element("title", exon.getName()));
                            g_tr.appendChild(wrapLoc(exRect, exon));
                        }
                        y2 += transcript_height + 0.5;
                    }
                    y = Math.max(y, y2);
                }
                y++;
            }
            final double sample_height = Double.parseDouble(this.dynamicParams.getOrDefault("sample.height", "25"));
            final double sample_height2 = sample_height - (max_circle_radius * 2.0);
            int space_between_samples = 2;
            int got_n_samples = 0;
            for (final String sn : header.getSampleNamesInOrder()) {
                if (!limitSamples.contains(sn))
                    continue;
                boolean got_this_sample = false;
                final Element g_sample = element("g");
                g_sample.setAttribute("transform", "translate(" + margin_left + "," + format(y) + ")");
                /* get coverage */
                final int maxCoverage;
                if (this.sample2bam.containsKey(sn)) {
                    final CoverageFactory coverageFactory = new CoverageFactory();
                    try (SamReader sr = this.openSamReader(this.sample2bam.get(sn))) {
                        /* loop over each variant set */
                        for (final VariantSet vset : L) {
                            vset.coverage = coverageFactory.getSimpleCoverage(sr, vset, sn);
                        }
                    }
                    maxCoverage = L.stream().flatMapToInt(V -> V.coverage.stream()).max().orElse(0);
                } else {
                    maxCoverage = 0;
                    for (final VariantSet vset : L) {
                        vset.coverage = null;
                    }
                }
                /* loop over each variant set */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_sample.appendChild(g_vset);
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    // plot set length
                    final Element rect = element("rect");
                    rect.setAttribute("class", "area" + (i % 2));
                    rect.setAttribute("x", "0");
                    rect.setAttribute("y", "0");
                    rect.setAttribute("width", format(vset.width));
                    rect.setAttribute("height", format(sample_height));
                    if (!remove_tooltip)
                        rect.appendChild(element("title", vset.toString()));
                    g_vset.appendChild(rect);
                    // plot coverage
                    if (maxCoverage > 0 && this.sample2bam.containsKey(sn)) {
                        final double[] scaled = vset.coverage.scaleAverage((int) vset.width);
                        final StringBuilder sb = new StringBuilder();
                        sb.append("0," + sample_height);
                        for (int t = 0; t < scaled.length; t++) {
                            if (t > 1 && t + 1 < scaled.length && format(scaled[t - 1]).equals(format(scaled[t + 1])) && format(scaled[t - 1]).equals(format(scaled[t])))
                                continue;
                            sb.append(" ").append(t).append(",");
                            sb.append(format(sample_height * (1.0 - scaled[t] / maxCoverage)));
                        }
                        sb.append(" " + format(vset.width) + "," + sample_height);
                        final Element polyline = element("polyline");
                        polyline.setAttribute("class", "coverage");
                        polyline.setAttribute("points", sb.toString());
                        g_vset.appendChild(polyline);
                        vset.coverage = null;
                    }
                    // plot vertical line if colorTag defined
                    if (!StringUtils.isBlank(this.colorTag)) {
                        for (final VariantContext vc : vset.variants) {
                            if (!vc.hasAttribute(this.colorTag))
                                continue;
                            final String cssColor = vc.getAttributeAsString(this.colorTag, "");
                            if (StringUtils.isBlank(cssColor))
                                continue;
                            final double x0 = base2pixel.applyAsDouble(vc.getStart());
                            final Element line = element("line");
                            line.setAttribute("class", "vc");
                            line.setAttribute("style", "stroke:" + cssColor);
                            line.setAttribute("x1", format(x0));
                            line.setAttribute("y1", "0");
                            line.setAttribute("x2", format(x0));
                            line.setAttribute("y2", format(sample_height));
                            g_vset.appendChild(line);
                        }
                    }
                    // print all variants in this vcfset for this sample
                    for (final VariantContext vc : vset.variants) {
                        final Genotype gt = vc.getGenotype(sn);
                        if (gt.isNoCall())
                            continue;
                        if (hide_hom_ref && gt.isHomRef())
                            continue;
                        if (gt.hasGQ() && gt.getGQ() < this.minGQ)
                            continue;
                        final OptionalDouble alt_ratio = getAltRatio(gt);
                        if (!alt_ratio.isPresent())
                            continue;
                        final OptionalDouble af = getAF(vc);
                        final double circle_radius = min_circle_radius + (max_circle_radius - min_circle_radius) * (1.0 - af.orElse(1.0));
                        // HOMREF=0;  HET =0.5; HOMVAR = 1;
                        final double gtx = base2pixel.applyAsDouble(vc.getStart());
                        final double gty = sample_height - (sample_height2 * alt_ratio.getAsDouble() + (sample_height - sample_height2) / 2.0);
                        final Element circle = element("circle");
                        circle.setAttribute("class", gt.getType().name());
                        circle.setAttribute("cx", format(gtx));
                        circle.setAttribute("cy", format(gty));
                        circle.setAttribute("r", format(circle_radius));
                        if (!remove_tooltip)
                            circle.appendChild(element("title", vc.getStart() + " " + (vc.hasID() ? vc.getID() : "") + " " + vc.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining("/")) + " " + gt.getType().name() + " AF=" + format(af.orElse(-1))));
                        g_vset.appendChild(wrapLoc(circle, vc));
                        got_this_sample = true;
                    }
                }
                final Element frame_sample = element("rect");
                frame_sample.setAttribute("class", "frame");
                frame_sample.setAttribute("x", "0");
                frame_sample.setAttribute("y", "0");
                frame_sample.setAttribute("width", format(drawingAreaWidth));
                frame_sample.setAttribute("height", format(sample_height));
                g_sample.appendChild(frame_sample);
                final Element label = element("text", sn + (maxCoverage == 0 ? "" : " Max Cov. " + maxCoverage));
                label.setAttribute("class", "samplelabel");
                label.setAttribute("x", "0");
                label.setAttribute("y", "0");
                // label.setAttribute("transform", "translate("+format(-10)+","+0+") rotate(90) ");
                label.setAttribute("transform", "translate(12,12)");
                if (!remove_tooltip)
                    label.appendChild(element("title", sn));
                g_sample.appendChild(label);
                if (got_this_sample) {
                    got_n_samples++;
                    main_g.appendChild(g_sample);
                    y += sample_height + space_between_samples;
                } else {
                    LOG.warn("no valid data for sample " + sn + " in " + bed);
                }
            }
            // remove extra sample space
            y -= space_between_samples;
            svgRoot.setAttribute("width", format(this.image_width_pixel + 1));
            svgRoot.setAttribute("height", format(y + 1));
            if (got_n_samples == 0) {
                LOG.info("no sample/genotype found for " + bed);
                return;
            }
            // save
            final Transformer tr = TransformerFactory.newInstance().newTransformer();
            final String filename = bed.getContig() + "_" + bed.getStart() + "_" + bed.getEnd() + ".svg" + (this.compressed_svg ? ".gz" : "");
            LOG.info("writing " + filename);
            if (this.compressed_svg) {
                try (final OutputStream pw = archive.openOuputStream(filename)) {
                    try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                        tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                        gzout.finish();
                        gzout.flush();
                    }
                    pw.flush();
                }
            } else {
                try (final PrintWriter pw = archive.openWriter(filename)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(pw));
                    pw.flush();
                }
            }
            manifest.print(bed.getContig());
            manifest.print("\t");
            manifest.print(bed.getStart() - 1);
            manifest.print("\t");
            manifest.print(bed.getEnd());
            manifest.print("\t");
            manifest.print(filename);
            manifest.println();
        } catch (final Throwable err) {
            throw new RuntimeException(err);
        } finally {
            this.document = null;
        }
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) GZIPOutputStream(java.util.zip.GZIPOutputStream) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StreamResult(javax.xml.transform.stream.StreamResult) Genotype(htsjdk.variant.variantcontext.Genotype) OptionalDouble(java.util.OptionalDouble) DocumentBuilder(javax.xml.parsers.DocumentBuilder)

Example 45 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class CnvSlidingWindow method doWork.

@Override
public int doWork(final List<String> args) {
    final List<Sample> samples = new ArrayList<>();
    try {
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final DistanceParser distParser = new DistanceParser();
        final int[] windows_array = Arrays.stream(CharSplitter.SEMICOLON.split(windowDefs)).filter(S -> !StringUtils.isBlank(S)).mapToInt(N -> distParser.applyAsInt(N)).toArray();
        if (windows_array.length == 0) {
            LOG.error("No window defined.");
            return -1;
        }
        if (windows_array.length % 2 != 0) {
            LOG.error("odd number of windows ? " + this.windowDefs);
            return -1;
        }
        final List<Path> inputBams = IOUtils.unrollPaths(args);
        if (inputBams.isEmpty()) {
            LOG.error("input bam file missing.");
            return -1;
        }
        final Set<String> sampleNames = new TreeSet<>();
        for (final Path samFile : inputBams) {
            final Sample sample = new Sample(samFile);
            if (sampleNames.contains(sample.name)) {
                LOG.error("duplicate sample " + sample.name);
                sample.close();
                return -1;
            }
            sampleNames.add(sample.name);
            samples.add(sample);
            SequenceUtil.assertSequenceDictionariesEqual(dict, sample.dict);
        }
        final List<Locatable> contigs = dict.getSequences().stream().filter(SR -> !SR.getSequenceName().matches(this.excludeRegex)).collect(Collectors.toCollection(ArrayList::new));
        if (this.excludeBedPath != null) {
            final BedLineCodec bedLineCodec = new BedLineCodec();
            final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(dict);
            try (BufferedReader br = IOUtils.openPathForBufferedReading(this.excludeBedPath)) {
                final List<SimpleInterval> exclude = br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> bedLineCodec.decode(L)).filter(B -> B != null && !StringUtils.isBlank(ctgConverter.apply(B.getContig()))).map(B -> new SimpleInterval(ctgConverter.apply(B.getContig()), B.getStart(), B.getEnd())).collect(Collectors.toList());
                boolean done = false;
                while (!done) {
                    done = true;
                    int i = 0;
                    while (i < contigs.size()) {
                        final Locatable contig = contigs.get(i);
                        final Locatable overlapper = exclude.stream().filter(EX -> EX.overlaps(contig)).findAny().orElse(null);
                        if (overlapper != null) {
                            contigs.remove(i);
                            contigs.addAll(split(contig, overlapper));
                            done = false;
                        } else {
                            i++;
                        }
                    }
                }
            }
        }
        contigs.sort(new ContigDictComparator(dict).createLocatableComparator());
        final Allele ref_allele = Allele.create("N", true);
        final Allele dup_allele = Allele.create("<DUP>", false);
        final Allele del_allele = Allele.create("<DEL>", false);
        final Function<Integer, List<Allele>> cnv2allele = CNV -> {
            switch(CNV) {
                case 0:
                    return Arrays.asList(ref_allele, ref_allele);
                case 1:
                    return Arrays.asList(ref_allele, dup_allele);
                case 2:
                    return Arrays.asList(dup_allele, dup_allele);
                case -1:
                    return Arrays.asList(ref_allele, del_allele);
                case -2:
                    return Arrays.asList(del_allele, del_allele);
                default:
                    throw new IllegalArgumentException("cnv:" + CNV);
            }
        };
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY);
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.END_KEY);
        final VCFFormatHeaderLine fmtLeftCov = new VCFFormatHeaderLine("LC", 1, VCFHeaderLineType.Float, "Left median coverage.");
        final VCFFormatHeaderLine fmtMidCov = new VCFFormatHeaderLine("MC", 1, VCFHeaderLineType.Float, "Middle median coverage.");
        final VCFFormatHeaderLine fmtRightCov = new VCFFormatHeaderLine("RC", 1, VCFHeaderLineType.Float, "right median coverage.");
        final VCFFormatHeaderLine fmtLeftMedian = new VCFFormatHeaderLine("LM", 1, VCFHeaderLineType.Float, "Left normalized median coverage.");
        final VCFFormatHeaderLine fmtMidMedian = new VCFFormatHeaderLine("MM", 1, VCFHeaderLineType.Float, "Middle normalized median coverage.");
        final VCFFormatHeaderLine fmtRightMedian = new VCFFormatHeaderLine("RM", 1, VCFHeaderLineType.Float, "right normalized median coverage.");
        metaData.add(fmtLeftCov);
        metaData.add(fmtMidCov);
        metaData.add(fmtRightCov);
        metaData.add(fmtLeftMedian);
        metaData.add(fmtMidMedian);
        metaData.add(fmtRightMedian);
        final VCFHeader header = new VCFHeader(metaData, sampleNames);
        header.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, header);
        final VariantContextWriter vcw = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
        vcw.writeHeader(header);
        for (final Locatable contig : contigs) {
            System.gc();
            final short[] array = new short[contig.getLengthOnReference()];
            final SortingCollection<Gt> sorter = SortingCollection.newInstance(Gt.class, new GtCodec(), (A, B) -> A.compare1(B), this.sorting.getMaxRecordsInRam(), this.sorting.getTmpPaths());
            for (int bam_index = 0; bam_index < samples.size(); bam_index++) {
                final Sample sampleBam = samples.get(bam_index);
                Arrays.fill(array, (short) 0);
                try (SAMRecordIterator iter = sampleBam.samReader.queryOverlapping(contig.getContig(), contig.getStart(), contig.getEnd())) {
                    while (iter.hasNext()) {
                        final SAMRecord rec = iter.next();
                        if (rec.getReadUnmappedFlag())
                            continue;
                        if (rec.getDuplicateReadFlag())
                            continue;
                        if (rec.isSecondaryOrSupplementary())
                            continue;
                        if (rec.getReadFailsVendorQualityCheckFlag())
                            continue;
                        final Cigar cigar = rec.getCigar();
                        if (cigar == null || cigar.isEmpty())
                            continue;
                        int refPos = rec.getStart();
                        for (final CigarElement ce : cigar) {
                            final CigarOperator op = ce.getOperator();
                            if (op.consumesReferenceBases()) {
                                if (op.consumesReadBases()) {
                                    for (int i = 0; i < ce.getLength(); i++) {
                                        final int idx = refPos - contig.getStart() + i;
                                        if (idx < 0)
                                            continue;
                                        if (idx >= array.length)
                                            break;
                                        if (array[idx] == Short.MAX_VALUE)
                                            continue;
                                        array[idx]++;
                                    }
                                }
                                refPos += ce.getLength();
                            }
                        }
                    }
                }
                for (int widx = 0; widx < windows_array.length; widx += 2) {
                    final int window_size = windows_array[widx + 0];
                    final int extend = (int) Math.ceil(window_size * this.extend);
                    if (extend <= 0)
                        continue;
                    if (window_size > contig.getLengthOnReference())
                        continue;
                    final int window_shift = windows_array[widx + 1];
                    LOG.info(contig + " " + window_size + "+-" + extend + ";" + window_shift + " " + sampleBam.name);
                    final Coverage leftcov = new Coverage(extend);
                    final Coverage rightcov = new Coverage(extend);
                    final Coverage leftrightcov = new Coverage(extend + extend);
                    final Coverage midcov = new Coverage(window_size);
                    for (int pos1 = contig.getStart(); pos1 + window_size + extend + extend < contig.getEnd(); pos1 += window_shift) {
                        leftcov.reset();
                        rightcov.reset();
                        leftrightcov.reset();
                        midcov.reset();
                        for (int x = 0; x < extend; x++) {
                            final int idx = pos1 - contig.getStart() + x;
                            leftcov.add(array[idx]);
                            leftrightcov.add(array[idx]);
                        }
                        final double leftMedian = leftcov.median();
                        if (leftMedian < this.min_depth)
                            continue;
                        for (int x = 0; x < extend; x++) {
                            final int idx = pos1 - contig.getStart() + extend + window_size + x;
                            rightcov.add(array[idx]);
                            leftrightcov.add(array[idx]);
                        }
                        final double rightMedian = rightcov.median();
                        if (rightMedian < this.min_depth)
                            continue;
                        for (int x = 0; x < window_size; x++) {
                            final int idx = pos1 - contig.getStart() + extend + x;
                            midcov.add(array[idx]);
                        }
                        final double median = leftrightcov.median();
                        if (rightcov.median() < this.min_depth)
                            continue;
                        for (int x = 0; x < midcov.count; x++) {
                            midcov.array[x] /= median;
                        }
                        for (int x = 0; x < leftcov.count; x++) {
                            leftcov.array[x] /= median;
                        }
                        for (int x = 0; x < rightcov.count; x++) {
                            rightcov.array[x] /= median;
                        }
                        final double norm_depth = midcov.median();
                        final int cnv = getCNVIndex(norm_depth);
                        if (cnv != CNV_UNDEFINED && cnv != 1 && getCNVIndex(leftcov.median()) == 1 && getCNVIndex(rightcov.median()) == 1) {
                            final Gt gt = new Gt();
                            gt.start = pos1 + extend;
                            gt.end = gt.start + window_size;
                            gt.sample_idx = bam_index;
                            gt.cnv = cnv;
                            gt.leftMedian = (float) leftMedian;
                            gt.midMedian = (float) median;
                            gt.rightMedian = (float) rightMedian;
                            gt.leftCov = (float) leftcov.median();
                            gt.midCov = (float) norm_depth;
                            gt.rightCov = (float) rightcov.median();
                            sorter.add(gt);
                        }
                    }
                }
            }
            sorter.setDestructiveIteration(true);
            try (CloseableIterator<Gt> iter = sorter.iterator()) {
                final EqualRangeIterator<Gt> eq = new EqualRangeIterator<>(iter, (A, B) -> A.compare0(B));
                while (eq.hasNext()) {
                    final List<Gt> row = eq.next();
                    if (row.isEmpty())
                        continue;
                    final Gt first = row.get(0);
                    final Set<Allele> alleles = new HashSet<>();
                    row.stream().flatMap(GT -> cnv2allele.apply(GT.cnv).stream()).forEach(CNV -> alleles.add(CNV));
                    alleles.add(ref_allele);
                    if (alleles.size() < 2)
                        continue;
                    final VariantContextBuilder vcb = new VariantContextBuilder(null, contig.getContig(), first.start, first.end, alleles);
                    vcb.attribute(VCFConstants.END_KEY, first.end);
                    final List<Genotype> genotypes = new ArrayList<>(samples.size());
                    for (final Gt gt : row) {
                        final GenotypeBuilder gb = new GenotypeBuilder(samples.get(gt.sample_idx).name, cnv2allele.apply(gt.cnv));
                        gb.attribute(fmtLeftMedian.getID(), (double) gt.leftMedian);
                        gb.attribute(fmtMidMedian.getID(), (double) gt.midMedian);
                        gb.attribute(fmtRightMedian.getID(), (double) gt.rightMedian);
                        gb.attribute(fmtLeftCov.getID(), (double) gt.leftCov);
                        gb.attribute(fmtMidCov.getID(), (double) gt.midCov);
                        gb.attribute(fmtRightCov.getID(), (double) gt.rightCov);
                        genotypes.add(gb.make());
                    }
                    vcb.genotypes(genotypes);
                    vcw.add(vcb.make());
                }
                eq.close();
            }
            sorter.cleanup();
        }
        vcw.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        samples.forEach(S -> CloserUtil.close(S));
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Locatable(htsjdk.samtools.util.Locatable) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Closeable(java.io.Closeable) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) List(java.util.List) ArrayList(java.util.ArrayList) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SAMRecord(htsjdk.samtools.SAMRecord) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32