Search in sources :

Example 81 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VCFShuffle method doWork.

@Override
public int doWork(final List<String> args) {
    if (seed == -1L)
        seed = System.currentTimeMillis();
    SortingCollection<RLine> shuffled = null;
    VariantContextWriter out = null;
    BufferedReader lr = null;
    try {
        lr = super.openBufferedReader(oneFileOrNull(args));
        out = super.openVariantContextWriter(this.outputFile);
        final Random random = new Random(this.seed);
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(lr);
        final VCFHeader header = cah.header;
        super.addMetaData(header);
        out.writeHeader(header);
        LOG.info("shuffling");
        shuffled = SortingCollection.newInstance(RLine.class, new RLineCodec(), (o1, o2) -> {
            final int i = o1.rand < o2.rand ? -1 : o1.rand > o2.rand ? 1 : 0;
            if (i != 0)
                return i;
            return o1.line.compareTo(o2.line);
        }, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        shuffled.setDestructiveIteration(true);
        String line;
        while ((line = lr.readLine()) != null) {
            final RLine rLine = new RLine();
            rLine.rand = random.nextLong();
            rLine.line = line;
            shuffled.add(rLine);
        }
        shuffled.doneAdding();
        LOG.info("done shuffling");
        final CloseableIterator<RLine> iter = shuffled.iterator();
        while (iter.hasNext()) {
            final VariantContext ctx = cah.codec.decode(iter.next().line);
            out.add(ctx);
            if (out.checkError())
                break;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (shuffled != null)
            shuffled.cleanup();
        CloserUtil.close(shuffled);
        CloserUtil.close(lr);
        CloserUtil.close(out);
    }
}
Also used : DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) Random(java.util.Random) File(java.io.File) ParametersDelegate(com.beust.jcommander.ParametersDelegate) List(java.util.List) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) Random(java.util.Random) BufferedReader(java.io.BufferedReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 82 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfSimulator method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.indexedFastaSequenceFile == null) {
        LOG.error("Reference is undefined");
        return -1;
    }
    if (!args.isEmpty()) {
        LOG.error("too many arguments");
        return -1;
    }
    if (this.randomSeed == -1L) {
        this.random = new Random(System.currentTimeMillis());
    } else {
        this.random = new Random(this.randomSeed);
    }
    if (this.numSamples < 0) {
        this.numSamples = 1 + this.random.nextInt(10);
    }
    while (this.samples.size() < numSamples) this.samples.add("SAMPLE" + (1 + this.samples.size()));
    VariantContextWriter writer = null;
    PrintStream pw = null;
    try {
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
        VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
        final VCFHeader header = new VCFHeader(metaData, this.samples);
        header.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        calc.setHeader(header);
        pw = super.openFileOrStdoutAsPrintStream(this.outputFile);
        writer = VCFUtils.createVariantContextWriterToOutputStream(pw);
        writer.writeHeader(header);
        long countVariantsSoFar = 0;
        for (; ; ) {
            if (pw.checkError())
                break;
            if (this.numberOfVariants >= 0 && countVariantsSoFar >= this.numberOfVariants)
                break;
            for (final SAMSequenceRecord ssr : this.indexedFastaSequenceFile.getSequenceDictionary().getSequences()) {
                if (pw.checkError())
                    break;
                final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
                for (int pos = 1; pos <= ssr.getSequenceLength(); ++pos) {
                    if (pw.checkError())
                        break;
                    char REF = Character.toUpperCase(genomic.charAt(pos - 1));
                    if (REF == 'N')
                        continue;
                    char ALT = 'N';
                    switch(REF) {
                        case 'A':
                            ALT = "TGC".charAt(random.nextInt(3));
                            break;
                        case 'T':
                            ALT = "AGC".charAt(random.nextInt(3));
                            break;
                        case 'G':
                            ALT = "TAC".charAt(random.nextInt(3));
                            break;
                        case 'C':
                            ALT = "TGA".charAt(random.nextInt(3));
                            break;
                        default:
                            ALT = 'N';
                    }
                    if (ALT == 'N')
                        continue;
                    final Allele refAllele = Allele.create((byte) REF, true);
                    Allele altAllele = Allele.create((byte) ALT, false);
                    final VariantContextBuilder cb = new VariantContextBuilder();
                    cb.chr(genomic.getChrom());
                    cb.start(pos);
                    cb.stop(pos);
                    List<Genotype> genotypes = new ArrayList<Genotype>(samples.size());
                    for (String sample : samples) {
                        final Allele a1 = (random.nextBoolean() ? refAllele : altAllele);
                        final Allele a2 = (random.nextBoolean() ? refAllele : altAllele);
                        GenotypeBuilder gb = new GenotypeBuilder(sample, Arrays.asList(a1, a2));
                        if (random.nextBoolean()) {
                            gb = new GenotypeBuilder(sample, Arrays.asList(a1, a2));
                            gb.DP(1 + random.nextInt(50));
                        }
                        genotypes.add(gb.make());
                    }
                    cb.genotypes(genotypes);
                    cb.alleles(Arrays.asList(refAllele, altAllele));
                    writer.add(calc.apply(cb.make()));
                    countVariantsSoFar++;
                }
            }
        }
        writer.close();
        writer = null;
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(pw);
        CloserUtil.close(writer);
    }
}
Also used : PrintStream(java.io.PrintStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) Random(java.util.Random) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 83 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfToSvg method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.outputFile != null && !outputFile.getName().contains(SEGMENT)) {
        LOG.error("output file must contain the word " + SEGMENT + " :" + this.outputFile);
        return -1;
    }
    TabixKnownGeneFileReader tabix = null;
    VcfIterator r = null;
    OutputStream outputStream = null;
    XMLStreamWriter w = null;
    PrintWriter manifestW = null;
    try {
        LOG.info("opening knownGene ");
        tabix = new TabixKnownGeneFileReader(knownGeneUri);
        if (manifestFile != null && this.outputFile != null) {
            manifestW = new PrintWriter(manifestFile);
        } else {
            manifestW = new PrintWriter(new NullOuputStream());
        }
        final Set<String> chromosomes = tabix.getChromosomes();
        final XMLOutputFactory xof = XMLOutputFactory.newInstance();
        r = super.openVcfIterator(super.oneFileOrNull(args));
        final VCFHeader header = r.getHeader();
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            String tabixContig = ctx.getContig();
            if (!chromosomes.contains(tabixContig)) {
                if (tabixContig.startsWith("chr")) {
                    tabixContig = tabixContig.substring(3);
                } else if (!tabixContig.startsWith("chr")) {
                    tabixContig = "chr" + tabixContig;
                }
                if (!chromosomes.contains(tabixContig)) {
                    while (r.hasNext()) {
                        final VariantContext ctx2 = r.peek();
                        if (!ctx2.getContig().equals(ctx.getContig()))
                            break;
                        r.next();
                    }
                    LOG.error("No chromosome " + ctx.getContig() + " in " + knownGeneUri + ". Check the chromosome nomenclature.");
                    continue;
                }
            }
            final List<VariantContext> variants = new ArrayList<>();
            final List<KnownGene> genes = new ArrayList<>();
            variants.add(ctx);
            int chromStart = ctx.getStart() - 1;
            int chromEnd = ctx.getEnd();
            /* walk over know gene, loop until there is no overapping transcript 
			 * over that region */
            for (; ; ) {
                genes.clear();
                /* the max chromEnd, let's see if we can get a bigger */
                int newStart = chromStart;
                int newEnd = chromEnd;
                final Iterator<KnownGene> kgr = tabix.iterator(tabixContig, chromStart, chromEnd);
                while (kgr.hasNext()) {
                    final KnownGene g = kgr.next();
                    if (this.removeNonCoding && g.isNonCoding())
                        continue;
                    genes.add(g);
                    newStart = Math.min(g.getTxStart(), newStart);
                    newEnd = Math.max(g.getTxEnd(), newEnd);
                }
                if (newStart >= chromStart && newEnd <= chromEnd) {
                    break;
                }
                chromStart = newStart;
                chromEnd = newEnd;
            }
            // intergenic, no gene over that variant
            if (genes.isEmpty())
                continue;
            // fill the variant for that region
            while (r.hasNext()) {
                final VariantContext ctx2 = r.peek();
                if (!ctx2.getContig().equals(ctx.getContig()))
                    break;
                if (ctx2.getStart() > chromEnd)
                    break;
                variants.add(r.next());
            }
            if (this.variantsInExonOnly) {
                variants.removeIf(V -> {
                    for (final KnownGene gene : genes) {
                        for (final KnownGene.Exon exon : gene.getExons()) {
                            if (V.getEnd() < exon.getStart() || V.getStart() >= exon.getEnd()) {
                            // rien
                            } else {
                                return false;
                            }
                        }
                    }
                    return true;
                });
            }
            if (this.variantFILTEREDOpacity <= 0) {
                variants.removeIf(V -> V.isFiltered());
            }
            if (this.variantIndelOpacity <= 0) {
                variants.removeIf(V -> V.isIndel());
            }
            if (variants.isEmpty())
                continue;
            LOG.info("Variants (" + variants.size() + ") Transcripts (" + genes.size() + ") " + tabixContig + ":" + chromStart + "-" + chromEnd);
            if (outputFile != null) {
                File fname = new File(outputFile.getParentFile(), outputFile.getName().replaceAll("__SEGMENT__", ctx.getContig() + "_" + chromStart + "_" + chromEnd));
                LOG.info("saving as " + fname);
                outputStream = IOUtils.openFileForWriting(fname);
                w = xof.createXMLStreamWriter(outputStream);
                manifestW.println(ctx.getContig() + "\t" + chromStart + "\t" + chromEnd + "\t" + genes.stream().map(G -> G.getName()).collect(Collectors.joining(",")) + "\t" + genes.size() + "\t" + variants.size() + "\t" + fname);
            } else {
                w = xof.createXMLStreamWriter(stdout());
            }
            double featureHeight = 10;
            double TRANSCRIPT_HEIGHT = featureHeight;
            final int all_genotypes_width = variants.size() * this.genotype_width;
            if (trimToVariants) {
                chromStart = variants.stream().map(V -> V.getStart() - 1).min((A, B) -> A.compareTo(B)).get();
                chromEnd = variants.stream().map(V -> V.getEnd() + 1).max((A, B) -> A.compareTo(B)).get();
            }
            final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
            final Interval interval = new Interval(ctx.getContig(), chromStart, chromEnd);
            final int interline_weight = 6;
            final int margin_top = 10;
            final int margin_bottom = 10;
            final int margin_right = 100;
            final int margin_left = 100;
            w.writeStartDocument("UTF-8", "1.0");
            w.writeStartElement("svg");
            w.writeDefaultNamespace(SVG.NS);
            w.writeNamespace("xlink", XLINK.NS);
            w.writeAttribute("version", "1.1");
            w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
            w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + genes.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + header.getSampleNamesInOrder().size() * this.genotype_width));
            title(w, ctx.getContig() + ":" + chromStart + "-" + chromEnd);
            w.writeStartElement("desc");
            w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
            w.writeEndElement();
            // defs
            w.writeStartElement("defs");
            // genotypes
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:lime;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:silver;stroke;gray;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:crimson;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.MIXED);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:pink;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:gray;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HET);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:lime;stroke;black;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(genotype_width));
            w.writeAttribute("height", String.valueOf(genotype_width));
            w.writeEmptyElement("polygon");
            w.writeAttribute("style", "fill:crimson;stroke;black;");
            w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
            w.writeEndElement();
            // strand
            w.writeEmptyElement("polyline");
            w.writeAttribute("id", "strandF");
            w.writeAttribute("points", "-5,-5 0,0 -5,5");
            w.writeEmptyElement("polyline");
            w.writeAttribute("id", "strandR");
            w.writeAttribute("points", "5,-5 0,0 5,5");
            // gradients
            w.writeStartElement("linearGradient");
            w.writeAttribute("id", "grad01");
            w.writeAttribute("x1", "50%");
            w.writeAttribute("x2", "50%");
            w.writeAttribute("y1", "0%");
            w.writeAttribute("y2", "100%");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "0%");
            w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "50%");
            w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "100%");
            w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
            w.writeEndElement();
            // defs
            w.writeEndElement();
            w.writeStartElement("style");
            w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
            // style
            w.writeEndElement();
            final Function<Integer, Integer> trim = new Function<Integer, Integer>() {

                @Override
                public Integer apply(final Integer t) {
                    return Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
                }
            };
            final Function<Integer, Double> baseToPixel = new Function<Integer, Double>() {

                @Override
                public Double apply(final Integer t) {
                    return margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.length());
                }
            };
            final Function<Integer, Double> variantIndexToPixel = new Function<Integer, Double>() {

                @Override
                public Double apply(final Integer idx) {
                    final double variant_width = drawinAreaWidth / (double) variants.size();
                    final double midx = variant_width * idx + variant_width / 2.0;
                    return margin_left + midx - genotype_width / 2.0;
                }
            };
            final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
            /**
             * title
             */
            double y = 0;
            w.writeStartElement("text");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", String.valueOf(featureHeight));
            w.writeCharacters(interval.toString());
            w.writeEndElement();
            y += featureHeight;
            for (final KnownGene g : genes) {
                int cdsHeigh = 5;
                double exonHeight = TRANSCRIPT_HEIGHT - 5;
                double midY = TRANSCRIPT_HEIGHT / 2;
                w.writeStartElement("g");
                w.writeAttribute("transform", "translate(0," + y + ")");
                title(w, g.getName());
                w.writeStartElement("text");
                w.writeAttribute("x", String.valueOf(margin_left - 10));
                w.writeAttribute("y", String.valueOf(featureHeight));
                w.writeAttribute("style", "text-anchor:end;");
                w.writeCharacters(g.getName());
                w.writeEndElement();
                /* transcript line */
                w.writeEmptyElement("line");
                w.writeAttribute("class", "kgtr");
                w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
                w.writeAttribute("y1", String.valueOf(midY));
                w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
                w.writeAttribute("y2", String.valueOf(midY));
                /* strand symbols */
                for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
                    double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.length();
                    if (pos0 + 1 < g.getTxStart())
                        continue;
                    if (pos0 > g.getTxEnd())
                        break;
                    w.writeEmptyElement("use");
                    w.writeAttribute("class", "kgstrand");
                    w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
                    w.writeAttribute("x", String.valueOf(margin_left + pixX));
                    w.writeAttribute("y", String.valueOf(midY));
                }
                /* exons */
                for (KnownGene.Exon exon : g.getExons()) {
                    if (exon.getStart() + 1 >= interval.getEnd())
                        continue;
                    if (exon.getEnd() <= interval.getStart())
                        continue;
                    w.writeStartElement("rect");
                    w.writeAttribute("class", "kgexon");
                    w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
                    w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
                    w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
                    w.writeAttribute("height", String.valueOf(exonHeight));
                    title(w, exon.getName());
                    w.writeEndElement();
                }
                /* coding line */
                if (!g.isNonCoding()) {
                    w.writeEmptyElement("rect");
                    w.writeAttribute("class", "kgcds");
                    w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(g.getCdsStart()))));
                    w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
                    w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(g.getCdsEnd())) - baseToPixel.apply((trim.apply((g.getCdsStart()))))));
                    w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
                }
                // String label=String.format("%15s", g.getName());
                // w.writeEmptyElement("path");
                // double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
                // w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
                w.writeEndElement();
                w.writeCharacters("\n");
                y += featureHeight;
            }
            /* draw lines to variants */
            for (int vidx = 0; vidx < variants.size(); ++vidx) {
                final VariantContext vc = variants.get(vidx);
                double x1 = baseToPixel.apply(vc.getStart());
                double x2 = baseToPixel.apply(vc.getEnd());
                final double y2 = y + featureHeight * interline_weight;
                w.writeStartElement("polygon");
                w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
                w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
                title(w, variantTitle.apply(vc));
                w.writeEndElement();
            }
            for (int vidx = 0; vidx < variants.size(); ++vidx) {
                final VariantContext vc = variants.get(vidx);
                final double y2 = y + featureHeight * interline_weight;
                w.writeStartElement("text");
                w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                w.writeAttribute("class", "postick");
                w.writeCharacters(variantTitle.apply(vc));
                w.writeEndElement();
                w.writeCharacters("\n");
            }
            y += featureHeight * interline_weight;
            w.writeStartElement("g");
            for (final String sample : header.getSampleNamesInOrder()) {
                for (int vidx = 0; vidx < variants.size(); ++vidx) {
                    final VariantContext vc = variants.get(vidx);
                    final Genotype g = vc.getGenotype(sample);
                    double opacity = 1.0;
                    if (vc.isIndel())
                        opacity *= this.variantIndelOpacity;
                    if (vc.isFiltered())
                        opacity *= this.variantFILTEREDOpacity;
                    if (opacity > 1)
                        opacity = 1;
                    if (opacity <= 0)
                        continue;
                    if (opacity < 1) {
                        w.writeStartElement("g");
                        w.writeAttribute("style", "opacity:" + opacity + ";");
                    }
                    w.writeEmptyElement("use");
                    w.writeAttribute("x", "" + variantIndexToPixel.apply(vidx));
                    w.writeAttribute("y", String.valueOf(y));
                    w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
                    if (opacity < 1) {
                        w.writeEndElement();
                    }
                }
                w.writeCharacters("\n");
                w.writeStartElement("text");
                w.writeAttribute("x", String.valueOf(margin_left - 10));
                w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
                w.writeAttribute("style", "text-anchor:end;");
                w.writeCharacters(sample);
                w.writeEndElement();
                y += this.genotype_width;
            }
            w.writeCharacters("\n");
            w.writeEndDocument();
            w.writeCharacters("\n");
            w.flush();
            w.close();
            if (outputFile != null) {
                outputStream.flush();
                outputStream.close();
                outputStream = null;
            }
            if (stop_at_first) {
                LOG.info("Stop after first SVG document");
                break;
            }
        }
        r.close();
        manifestW.flush();
        manifestW.close();
        manifestW = null;
        return 0;
    } catch (Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(tabix);
        CloserUtil.close(outputStream);
        CloserUtil.close(manifestW);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) XLINK(com.github.lindenb.jvarkit.util.ns.XLINK) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) Function(java.util.function.Function) SVG(com.github.lindenb.jvarkit.util.svg.SVG) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) CloserUtil(htsjdk.samtools.util.CloserUtil) OutputStream(java.io.OutputStream) PrintWriter(java.io.PrintWriter) Iterator(java.util.Iterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) Set(java.util.Set) Collectors(java.util.stream.Collectors) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) TabixKnownGeneFileReader(com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader) File(java.io.File) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) VariantContext(htsjdk.variant.variantcontext.VariantContext) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Function(java.util.function.Function) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) Genotype(htsjdk.variant.variantcontext.Genotype) TabixKnownGeneFileReader(com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 84 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfOptimizePedForSkat method exec.

private void exec(final long generation, final VCFHeader header, final List<VariantContext> variants, final List<Pedigree.Person> samples, final SkatFactory.SkatExecutor skatExecutor) {
    final Solution solution = new Solution();
    solution.generation = generation;
    String origin = "random";
    if (generation != 0) {
        int nRemove = 1 + this.random.nextInt(this.nSamplesRemove);
        if (generation == 1 && this.bootstrapSamples != null) {
            origin = "bootstrap";
            for (final String sample : this.bootstrapSamples.split("[; ,]")) {
                if (StringUtil.isBlank(sample))
                    continue;
                if (!samples.stream().anyMatch(S -> S.getId().equals(sample))) {
                    throw new JvarkitException.UserError("Sample " + sample + " not found in effective pedigree.");
                }
                LOG.info("bootstraping with " + sample);
                solution.sampleSet.add(sample);
            }
        } else if (generation % 5 == 0 && !this.bestSolutions.isEmpty()) {
            int sol_index = this.random.nextInt(Math.min(this.max_results_output, this.bestSolutions.size()));
            final List<String> list = new ArrayList<>(this.bestSolutions.get(sol_index).sampleSet);
            if (list.size() > 1 && this.random.nextBoolean()) {
                origin = "best-minus-random";
                list.remove(this.random.nextInt(list.size()));
            } else if (list.size() < nRemove) {
                origin = "best-plus-random";
                list.add(samples.get(this.random.nextInt(samples.size())).getId());
            }
            solution.sampleSet.addAll(list);
        } else if (generation % 7 == 0 && this.bestSolutions.size() > 2) {
            final Set<String> set = new HashSet<>(this.bestSolutions.get(0).sampleSet);
            set.addAll(this.bestSolutions.get(1).sampleSet);
            final List<String> bestsamples = new ArrayList<>(set);
            Collections.shuffle(bestsamples, this.random);
            while (bestsamples.size() > nRemove) {
                bestsamples.remove(0);
            }
            solution.sampleSet.addAll(bestsamples);
            origin = "best0-plus-best1";
        } else {
            while (nRemove > 0) {
                final String sampleId;
                if (generation % 3 == 0L && nRemove % 2 == 0 && this.bestSolutions.size() > 0 && !this.bestSolutions.get(0).sampleSet.isEmpty()) {
                    final List<String> bestsamples = new ArrayList<>(this.bestSolutions.get(0).sampleSet);
                    sampleId = bestsamples.get(this.random.nextInt(bestsamples.size()));
                    origin = "random-plus-best0";
                } else {
                    sampleId = samples.get(this.random.nextInt(samples.size())).getId();
                }
                solution.sampleSet.add(sampleId);
                nRemove--;
            }
        }
    } else {
        origin = "original";
    }
    if (generation > 0 && solution.sampleSet.isEmpty())
        return;
    while (solution.sampleSet.size() > this.nSamplesRemove) {
        LOG.warn("Hum... to many for " + origin);
        final List<String> L = new ArrayList<>(solution.sampleSet);
        while (L.size() > 0 && L.size() > this.nSamplesRemove) L.remove(this.random.nextInt(L.size()));
        solution.sampleSet.clear();
        solution.sampleSet.addAll(L);
    }
    if (this.bestSolutions.contains(solution))
        return;
    solution.origin = origin;
    final List<Pedigree.Person> ped2 = new ArrayList<>(samples);
    ped2.removeIf(I -> solution.sampleSet.contains(I.getId()));
    if (ped2.isEmpty())
        return;
    if (!ped2.stream().anyMatch(P -> P.isAffected()))
        return;
    if (!ped2.stream().anyMatch(P -> P.isUnaffected()))
        return;
    // test MAF et Fisher
    final VCFHeader header2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
    final VcfBurdenFisherH.CtxWriterFactory fisherhFactory = new VcfBurdenFisherH.CtxWriterFactory();
    fisherhFactory.setCaseControlExtractor((H) -> new HashSet<>(ped2));
    fisherhFactory.setIgnoreFiltered(true);
    final VcfBurdenMAF.CtxWriterFactory mafFactory = new VcfBurdenMAF.CtxWriterFactory();
    mafFactory.setCaseControlExtractor((H) -> new HashSet<>(ped2));
    mafFactory.setIgnoreFiltered(true);
    final VariantCollector collector = new VariantCollector();
    VariantContextWriter vcw = mafFactory.open(collector);
    vcw = fisherhFactory.open(vcw);
    vcw.writeHeader(header2);
    for (final VariantContext vc : variants) {
        vcw.add(vc);
    }
    vcw.close();
    CloserUtil.close(fisherhFactory);
    CloserUtil.close(mafFactory);
    // 
    solution.result = skatExecutor.execute(collector.variants, ped2);
    if (solution.result.isError())
        return;
    if (this.bestSolutions.isEmpty() || solution.compareTo(this.bestSolutions.get(this.bestSolutions.size() - 1)) < 0) {
        this.bestSolutions.add(solution);
        if (this.firstScore == null) {
            this.firstScore = solution.result.getPValue();
        }
        Collections.sort(this.bestSolutions);
        final int BUFFER_RESULT = 1000;
        while (this.bestSolutions.size() > BUFFER_RESULT) {
            this.bestSolutions.remove(this.bestSolutions.size() - 1);
        }
        if (this.bestSolutions.indexOf(solution) < this.max_results_output) {
            final StringBuilder sb = new StringBuilder();
            sb.append(">>> ").append(generation).append("\n");
            this.bestSolutions.stream().limit(this.max_results_output).forEach(S -> sb.append(S).append("\n"));
            sb.append("<<< ").append(generation).append("\n");
            if (outputFile == null) {
                stdout().println(sb.toString());
            } else {
                stderr().println(sb.toString());
                IOUtils.cat(sb.toString(), this.outputFile, false);
            }
        }
    }
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Random(java.util.Random) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) HashSet(java.util.HashSet)

Example 85 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfMultiToOneInfo method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader srcHeader = in.getHeader();
    final VCFInfoHeaderLine srcInfo = srcHeader.getInfoHeaderLine(this.infoTag);
    if (srcInfo == null) {
        LOG.error("Cannot find INFO FIELD '" + this.infoTag + "'");
        return -1;
    }
    switch(srcInfo.getCountType()) {
        case INTEGER:
            break;
        case UNBOUNDED:
            break;
        default:
            {
                LOG.error("CountType is not supported '" + srcInfo.getCountType() + "'");
                return -1;
            }
    }
    switch(srcInfo.getType()) {
        case Flag:
            {
                LOG.error("Type is not supported '" + srcInfo.getType() + "'");
                return -1;
            }
        default:
            break;
    }
    final VCFHeader destHeader = new VCFHeader(srcHeader);
    super.addMetaData(destHeader);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(srcHeader);
    out.writeHeader(destHeader);
    while (in.hasNext()) {
        final VariantContext ctx = progess.watch(in.next());
        final List<Object> L = ctx.getAttributeAsList(srcInfo.getID());
        if (L.isEmpty() || L.size() == 1) {
            out.add(ctx);
            continue;
        }
        for (final Object o : L) {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(srcInfo.getID(), o);
            out.add(vcb.make());
        }
    }
    progess.finish();
    LOG.info("done");
    return RETURN_OK;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34