Search in sources :

Example 11 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class VcfToPostscript method doWork.

@Override
public int doWork(List<String> args) {
    VcfIterator iter = null;
    BufferedReader r = null;
    try {
        iter = super.openVcfIterator(oneFileOrNull(args));
        this.outw = super.openFileOrStdoutAsPrintStream(this.outputFile);
        final SAMSequenceDictionary dict = iter.getHeader().getSequenceDictionary();
        LOG.info("Reading " + this.ucscKnownGene);
        r = IOUtils.openURIForBufferedReading(this.ucscKnownGene);
        int nKG = 0;
        Pattern tab = Pattern.compile("[\t]");
        String line;
        while ((line = r.readLine()) != null) {
            String[] tokens = tab.split(line);
            KnownGene g = new KnownGene(tokens);
            if (dict != null && dict.getSequence(g.getContig()) == null)
                continue;
            List<KnownGene> kg = this.chrom2knownGenes.get(g.getContig());
            if (kg == null) {
                kg = new ArrayList<KnownGene>();
                this.chrom2knownGenes.put(g.getContig(), kg);
            }
            kg.add(g);
            ++nKG;
        }
        r.close();
        LOG.info("Done Reading knownGenes. N=" + nKG);
        final double ticksH = (fHeight / 2.0f) * 0.6f;
        final double ticksx = 20;
        this.outw.print("%!PS\n" + "%%Creator: Pierre Lindenbaum PhD plindenbaum@yahoo.fr http://plindenbaum.blogspot.com\n" + "%%Title: " + getClass().getSimpleName() + "\n" + "%%CreationDate: " + new Date() + "\n" + "%%EndComments\n" + "%%BoundingBox: 0 0 " + (this.pageDef.width + margin.left + margin.right) + " " + (this.pageDef.height + margin.top + margin.bottom) + "\n" + "/Courier findfont 9 scalefont setfont\n" + "/circle { 10 0 360 arc} bind def\n" + "/ticksF {\n" + (-ticksH) + " " + (ticksH) + " rmoveto\n" + ticksH + " " + (-ticksH) + " rlineto\n" + (-ticksH) + " " + (-ticksH) + " rlineto\n" + ticksH + " " + ticksH + " rmoveto\n" + "} bind def\n" + "/ticksR {\n" + (ticksH) + " " + (ticksH) + " rmoveto\n" + (-ticksH) + " " + (-ticksH) + " rlineto\n" + (ticksH) + " " + (-ticksH) + " rlineto\n" + (-ticksH) + " " + (ticksH) + " rmoveto\n" + "} bind def\n" + "/forticksF {2 dict begin\n" + "/x2 exch def\n" + "/x1 exch def\n" + "0 1 x2 x1 sub " + ticksx + " div {\n" + "ticksF " + ticksx + " 0 rmoveto\n" + "}for\n" + "} bind def\n" + "/forticksR {2 dict begin\n" + "/x2 exch def\n" + "/x1 exch def\n" + "0 1 x2 x1 sub " + ticksx + " div {\n" + " ticksR  " + ticksx + " 0 rmoveto\n" + "}for\n" + "} bind def\n" + "/box\n" + "{\n" + "4 dict begin\n" + "/height exch def\n" + "/width exch def\n" + "/y exch def\n" + "/x exch def\n" + "x y moveto\n" + "width 0 rlineto\n" + "0 height rlineto\n" + "width -1 mul 0 rlineto\n" + "0 height -1 mul rlineto\n" + "end\n" + "} bind def\n" + "/gradient\n" + "{\n" + "4 dict begin\n" + "/height exch def\n" + "/width exch def\n" + "/y exch def\n" + "/x exch def\n" + "/i 0 def\n" + "height 2 div /i exch def\n" + "\n" + "0 1 height 2 div {\n" + "	1 i height 2.0 div div sub setgray\n" + "	newpath\n" + "	x  \n" + "	y height 2 div i sub  add\n" + "	width\n" + "	i 2 mul\n" + "	box\n" + "	closepath\n" + "	fill\n" + "	i 1 sub /i exch def\n" + "	}for\n" + "newpath\n" + "0 setgray\n" + "0.4 setlinewidth\n" + "x y width height box\n" + "closepath\n" + "stroke\n" + "end\n" + "} bind def\n");
        run(iter);
        iter.close();
        this.outw.print("\n%%Trailer\n%%EOF\n");
        this.outw.flush();
        this.outw.close();
        this.outw = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(this.outw);
    }
}
Also used : Pattern(java.util.regex.Pattern) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Date(java.util.Date) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) BufferedReader(java.io.BufferedReader) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Example 12 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class VcfToPostscript method print.

private void print() {
    if (this.positions.isEmpty()) {
        return;
    }
    ++count_pages_printed;
    final KnownGene first = genes.get(0);
    final double fHeight = 20;
    final Dimension localPage = new Dimension((int) (this.pageDef.width), (int) (margin.top + margin.bottom + (this.genes.size() + 1) * fHeight + this.sample2positions.size() * fHeight));
    this.outw.println("\n%%Page: " + count_pages_printed + " " + count_pages_printed);
    this.outw.println("gsave");
    this.outw.println(1.0 + " " + (localPage.height <= this.pageDef.height ? 1.0 : 1.0 / (localPage.getHeight() / (float) this.pageDef.getHeight())) + " scale");
    // this.outw.println( "%%BoundingBox: 0 0 "  + page.width +  " "  + page.height  );
    float midy = (float) (fHeight / 2.0f);
    double cdsHeight = fHeight * 0.4;
    double exonHeight = fHeight * 0.9;
    this.outw.println("2 " + (localPage.height - 10) + " moveto (" + first.getChromosome() + ":" + this.chromStart + "-" + this.chromEnd + ") show");
    this.outw.println("1 0 0 setrgbcolor");
    this.outw.println("0.3 setlinewidth");
    for (Integer r : this.positions) {
        this.outw.print("newpath " + (float) toPixel(r) + " 0 moveto 0 " + localPage.height + " rlineto stroke\n");
        this.outw.print((float) toPixel(r) + " " + (localPage.height - 5) + " moveto -90 rotate (" + (r) + ") show 90 rotate\n");
    }
    for (int i = 0; i < this.genes.size(); ++i) {
        final KnownGene g = this.genes.get(i);
        this.outw.println("gsave");
        this.outw.println("0 " + (localPage.height - margin.top - (fHeight * i)) + " translate");
        double x1 = toPixel(g.getTxStart());
        double x2 = toPixel(g.getTxEnd());
        this.outw.print("0 0 0 setrgbcolor\n");
        NEWPATH();
        MOVETO(x1, midy);
        LINETO(x2, midy);
        STROKE();
        // draw ticks
        this.outw.print("0.2 setlinewidth\n");
        this.outw.print("newpath\n");
        this.outw.print(x1 + " " + midy + " moveto\n");
        this.outw.print(x1 + " " + x2 + (g.isPositiveStrand() ? " forticksF" : " forticksR") + "\n");
        this.outw.print("closepath stroke\n");
        this.outw.print("0.5 setlinewidth\n");
        // draw txStart/txEnd
        this.outw.print("0.1 0.1 0.5 setrgbcolor\n" + "newpath\n" + +toPixel(g.getCdsStart()) + " " + +(midy - cdsHeight / 2.0) + " " + (toPixel(g.getCdsEnd()) - toPixel(g.getCdsStart())) + " " + cdsHeight + " box closepath fill\n");
        // draw each exon
        for (int j = 0; j < g.getExonCount(); ++j) {
            this.outw.print(toPixel(g.getExon(j).getStart()) + " " + (midy - exonHeight / 2.0) + " " + (float) (toPixel(g.getExon(j).getEnd()) - toPixel(g.getExon(j).getStart())) + " " + exonHeight + " gradient\n");
        }
        // draw name
        this.outw.print("0 0 0 setrgbcolor\n");
        this.outw.print("10 " + midy + " moveto (" + g.getName() + ") show\n");
        this.outw.println("grestore");
    }
    // samples
    {
        double y = localPage.height - margin.top - (fHeight * (this.genes.size() + 1));
        for (String sample : this.sample2positions.keySet()) {
            this.outw.print("0.2 setlinewidth\n");
            this.outw.print("0 0 0 setrgbcolor\n");
            this.outw.print("10 " + (y - midy + 5) + " moveto (" + sample + ") show\n");
            this.outw.print("newpath " + margin.left + " " + y + " moveto\n" + pageDef.width + " " + 0 + " rlineto stroke\n");
            for (Integer r2 : this.sample2positions.get(sample)) {
                this.outw.print("0.8 setlinewidth\n");
                this.outw.print("newpath " + toPixel(r2) + " " + y + " circle closepath stroke\n");
            }
            y -= fHeight;
        }
    }
    this.outw.println("grestore");
    this.outw.print("showpage\n");
}
Also used : KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) Dimension(java.awt.Dimension)

Example 13 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class FindNewSpliceSites method findJunction.

private boolean findJunction(Collection<KnownGene> genes, int start1, int end1) {
    for (final KnownGene g : genes) {
        for (int k = 0; k + 1 < g.getExonCount(); ++k) {
            final Exon ex0 = g.getExon(k);
            final Exon ex1 = g.getExon(k + 1);
            if (is_close_to(ex0.getEnd() + 1, start1) && is_close_to(ex1.getStart() + 1, end1)) {
                return true;
            }
        }
    }
    return false;
}
Also used : Exon(com.github.lindenb.jvarkit.util.ucsc.KnownGene.Exon) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Example 14 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class FindNewSpliceSites method scanRead.

private void scanRead(final SAMRecord rec, final SAMSequenceDictionary dict) {
    final Cigar cigar = rec.getCigar();
    // if(cigar==null || !rec.getCigarString().contains("N")) return; //aleady checked
    final Interval interval = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd());
    final List<KnownGene> genes = new ArrayList<>();
    for (final List<KnownGene> list : this.knownGenesMap.getOverlapping(interval)) {
        genes.addAll(list);
    }
    if (genes.isEmpty()) {
        return;
    }
    int refPos1 = rec.getAlignmentStart();
    for (int cIdx = 0; cIdx < cigar.numCigarElements(); ++cIdx) {
        CigarElement ce = cigar.getCigarElement(cIdx);
        switch(ce.getOperator()) {
            case S:
                break;
            case I:
                break;
            case N:
                {
                    if (cIdx + 1 < cigar.numCigarElements() && isMatch(cigar.getCigarElement(cIdx + 1)) && !findJunction(genes, refPos1 - 1, refPos1 + ce.getLength())) {
                        // unknown junction
                        this.sfw.addAlignment(rec);
                        return;
                    }
                    refPos1 += ce.getLength();
                    break;
                }
            case D:
            case X:
            case EQ:
            case M:
                {
                    refPos1 += ce.getLength();
                    break;
                }
            // ignore
            case H:
            // ignore
            case P:
                break;
            default:
                throw new RuntimeException("operator not handled. ops.");
        }
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) ArrayList(java.util.ArrayList) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) CigarElement(htsjdk.samtools.CigarElement) Interval(htsjdk.samtools.util.Interval)

Example 15 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class FindNewSpliceSites method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.knownGeneUri == null || this.knownGeneUri.trim().isEmpty()) {
        LOG.error("known Gene file undefined");
        return -1;
    }
    SamReader sfr = null;
    try {
        final Pattern tab = Pattern.compile("[\t]");
        {
            LOG.info("Opening " + this.knownGeneUri);
            LineIterator r = IOUtils.openURIForLineIterator(this.knownGeneUri);
            while (r.hasNext()) {
                final KnownGene g = new KnownGene(tab.split(r.next()));
                // need spliced one
                if (g.getExonCount() == 1)
                    continue;
                final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd());
                List<KnownGene> L = this.knownGenesMap.get(interval);
                if (L == null) {
                    L = new ArrayList<>();
                    this.knownGenesMap.put(interval, L);
                }
                L.add(g);
            }
            LOG.info("Done reading: " + this.knownGeneUri);
        }
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader().clone();
        SAMProgramRecord p = header.createProgramRecord();
        p.setCommandLine(getProgramCommandLine());
        p.setProgramVersion(getVersion());
        p.setProgramName(getProgramName());
        this.sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
        header = sfr.getFileHeader().clone();
        p = header.createProgramRecord();
        p.setCommandLine(getProgramCommandLine());
        p.setProgramVersion(getVersion());
        p.setProgramName(getProgramName());
        this.weird = this.writingBamArgs.createSAMFileWriterFactory().makeSAMWriter(header, true, new NullOuputStream());
        scan(sfr);
        sfr.close();
        LOG.info("Done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(this.sfw);
        CloserUtil.close(this.weird);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) Pattern(java.util.regex.Pattern) ArrayList(java.util.ArrayList) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LineIterator(htsjdk.tribble.readers.LineIterator) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Aggregations

KnownGene (com.github.lindenb.jvarkit.util.ucsc.KnownGene)22 Interval (htsjdk.samtools.util.Interval)11 ArrayList (java.util.ArrayList)9 BufferedReader (java.io.BufferedReader)8 Pattern (java.util.regex.Pattern)8 IOException (java.io.IOException)7 VariantContext (htsjdk.variant.variantcontext.VariantContext)6 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 File (java.io.File)5 HashSet (java.util.HashSet)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 List (java.util.List)4 Parameter (com.beust.jcommander.Parameter)3 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)3 GeneticCode (com.github.lindenb.jvarkit.util.bio.GeneticCode)3 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)3 Logger (com.github.lindenb.jvarkit.util.log.Logger)3 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)3 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)3