use of com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader in project jvarkit by lindenb.
the class VcfToSvg method doWork.
@Override
public int doWork(final List<String> args) {
if (this.outputFile != null && !outputFile.getName().contains(SEGMENT)) {
LOG.error("output file must contain the word " + SEGMENT + " :" + this.outputFile);
return -1;
}
TabixKnownGeneFileReader tabix = null;
VcfIterator r = null;
OutputStream outputStream = null;
XMLStreamWriter w = null;
PrintWriter manifestW = null;
try {
LOG.info("opening knownGene ");
tabix = new TabixKnownGeneFileReader(knownGeneUri);
if (manifestFile != null && this.outputFile != null) {
manifestW = new PrintWriter(manifestFile);
} else {
manifestW = new PrintWriter(new NullOuputStream());
}
final Set<String> chromosomes = tabix.getChromosomes();
final XMLOutputFactory xof = XMLOutputFactory.newInstance();
r = super.openVcfIterator(super.oneFileOrNull(args));
final VCFHeader header = r.getHeader();
while (r.hasNext()) {
final VariantContext ctx = r.next();
String tabixContig = ctx.getContig();
if (!chromosomes.contains(tabixContig)) {
if (tabixContig.startsWith("chr")) {
tabixContig = tabixContig.substring(3);
} else if (!tabixContig.startsWith("chr")) {
tabixContig = "chr" + tabixContig;
}
if (!chromosomes.contains(tabixContig)) {
while (r.hasNext()) {
final VariantContext ctx2 = r.peek();
if (!ctx2.getContig().equals(ctx.getContig()))
break;
r.next();
}
LOG.error("No chromosome " + ctx.getContig() + " in " + knownGeneUri + ". Check the chromosome nomenclature.");
continue;
}
}
final List<VariantContext> variants = new ArrayList<>();
final List<KnownGene> genes = new ArrayList<>();
variants.add(ctx);
int chromStart = ctx.getStart() - 1;
int chromEnd = ctx.getEnd();
/* walk over know gene, loop until there is no overapping transcript
* over that region */
for (; ; ) {
genes.clear();
/* the max chromEnd, let's see if we can get a bigger */
int newStart = chromStart;
int newEnd = chromEnd;
final Iterator<KnownGene> kgr = tabix.iterator(tabixContig, chromStart, chromEnd);
while (kgr.hasNext()) {
final KnownGene g = kgr.next();
if (this.removeNonCoding && g.isNonCoding())
continue;
genes.add(g);
newStart = Math.min(g.getTxStart(), newStart);
newEnd = Math.max(g.getTxEnd(), newEnd);
}
if (newStart >= chromStart && newEnd <= chromEnd) {
break;
}
chromStart = newStart;
chromEnd = newEnd;
}
// intergenic, no gene over that variant
if (genes.isEmpty())
continue;
// fill the variant for that region
while (r.hasNext()) {
final VariantContext ctx2 = r.peek();
if (!ctx2.getContig().equals(ctx.getContig()))
break;
if (ctx2.getStart() > chromEnd)
break;
variants.add(r.next());
}
if (this.variantsInExonOnly) {
variants.removeIf(V -> {
for (final KnownGene gene : genes) {
for (final KnownGene.Exon exon : gene.getExons()) {
if (V.getEnd() < exon.getStart() || V.getStart() >= exon.getEnd()) {
// rien
} else {
return false;
}
}
}
return true;
});
}
if (this.variantFILTEREDOpacity <= 0) {
variants.removeIf(V -> V.isFiltered());
}
if (this.variantIndelOpacity <= 0) {
variants.removeIf(V -> V.isIndel());
}
if (variants.isEmpty())
continue;
LOG.info("Variants (" + variants.size() + ") Transcripts (" + genes.size() + ") " + tabixContig + ":" + chromStart + "-" + chromEnd);
if (outputFile != null) {
File fname = new File(outputFile.getParentFile(), outputFile.getName().replaceAll("__SEGMENT__", ctx.getContig() + "_" + chromStart + "_" + chromEnd));
LOG.info("saving as " + fname);
outputStream = IOUtils.openFileForWriting(fname);
w = xof.createXMLStreamWriter(outputStream);
manifestW.println(ctx.getContig() + "\t" + chromStart + "\t" + chromEnd + "\t" + genes.stream().map(G -> G.getName()).collect(Collectors.joining(",")) + "\t" + genes.size() + "\t" + variants.size() + "\t" + fname);
} else {
w = xof.createXMLStreamWriter(stdout());
}
double featureHeight = 10;
double TRANSCRIPT_HEIGHT = featureHeight;
final int all_genotypes_width = variants.size() * this.genotype_width;
if (trimToVariants) {
chromStart = variants.stream().map(V -> V.getStart() - 1).min((A, B) -> A.compareTo(B)).get();
chromEnd = variants.stream().map(V -> V.getEnd() + 1).max((A, B) -> A.compareTo(B)).get();
}
final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
final Interval interval = new Interval(ctx.getContig(), chromStart, chromEnd);
final int interline_weight = 6;
final int margin_top = 10;
final int margin_bottom = 10;
final int margin_right = 100;
final int margin_left = 100;
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + genes.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + header.getSampleNamesInOrder().size() * this.genotype_width));
title(w, ctx.getContig() + ":" + chromStart + "-" + chromEnd);
w.writeStartElement("desc");
w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
w.writeEndElement();
// defs
w.writeStartElement("defs");
// genotypes
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:silver;stroke;gray;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:crimson;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.MIXED);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:pink;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:gray;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HET);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;black;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(genotype_width));
w.writeAttribute("height", String.valueOf(genotype_width));
w.writeEmptyElement("polygon");
w.writeAttribute("style", "fill:crimson;stroke;black;");
w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
w.writeEndElement();
// strand
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandF");
w.writeAttribute("points", "-5,-5 0,0 -5,5");
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandR");
w.writeAttribute("points", "5,-5 0,0 5,5");
// gradients
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
// style
w.writeEndElement();
final Function<Integer, Integer> trim = new Function<Integer, Integer>() {
@Override
public Integer apply(final Integer t) {
return Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
}
};
final Function<Integer, Double> baseToPixel = new Function<Integer, Double>() {
@Override
public Double apply(final Integer t) {
return margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.length());
}
};
final Function<Integer, Double> variantIndexToPixel = new Function<Integer, Double>() {
@Override
public Double apply(final Integer idx) {
final double variant_width = drawinAreaWidth / (double) variants.size();
final double midx = variant_width * idx + variant_width / 2.0;
return margin_left + midx - genotype_width / 2.0;
}
};
final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
/**
* title
*/
double y = 0;
w.writeStartElement("text");
w.writeAttribute("x", "0");
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeCharacters(interval.toString());
w.writeEndElement();
y += featureHeight;
for (final KnownGene g : genes) {
int cdsHeigh = 5;
double exonHeight = TRANSCRIPT_HEIGHT - 5;
double midY = TRANSCRIPT_HEIGHT / 2;
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
title(w, g.getName());
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(g.getName());
w.writeEndElement();
/* transcript line */
w.writeEmptyElement("line");
w.writeAttribute("class", "kgtr");
w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
w.writeAttribute("y1", String.valueOf(midY));
w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
w.writeAttribute("y2", String.valueOf(midY));
/* strand symbols */
for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.length();
if (pos0 + 1 < g.getTxStart())
continue;
if (pos0 > g.getTxEnd())
break;
w.writeEmptyElement("use");
w.writeAttribute("class", "kgstrand");
w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
w.writeAttribute("x", String.valueOf(margin_left + pixX));
w.writeAttribute("y", String.valueOf(midY));
}
/* exons */
for (KnownGene.Exon exon : g.getExons()) {
if (exon.getStart() + 1 >= interval.getEnd())
continue;
if (exon.getEnd() <= interval.getStart())
continue;
w.writeStartElement("rect");
w.writeAttribute("class", "kgexon");
w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
w.writeAttribute("height", String.valueOf(exonHeight));
title(w, exon.getName());
w.writeEndElement();
}
/* coding line */
if (!g.isNonCoding()) {
w.writeEmptyElement("rect");
w.writeAttribute("class", "kgcds");
w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(g.getCdsStart()))));
w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(g.getCdsEnd())) - baseToPixel.apply((trim.apply((g.getCdsStart()))))));
w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
}
// String label=String.format("%15s", g.getName());
// w.writeEmptyElement("path");
// double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
// w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
w.writeEndElement();
w.writeCharacters("\n");
y += featureHeight;
}
/* draw lines to variants */
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
double x1 = baseToPixel.apply(vc.getStart());
double x2 = baseToPixel.apply(vc.getEnd());
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("polygon");
w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
title(w, variantTitle.apply(vc));
w.writeEndElement();
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("text");
w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("class", "postick");
w.writeCharacters(variantTitle.apply(vc));
w.writeEndElement();
w.writeCharacters("\n");
}
y += featureHeight * interline_weight;
w.writeStartElement("g");
for (final String sample : header.getSampleNamesInOrder()) {
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final Genotype g = vc.getGenotype(sample);
double opacity = 1.0;
if (vc.isIndel())
opacity *= this.variantIndelOpacity;
if (vc.isFiltered())
opacity *= this.variantFILTEREDOpacity;
if (opacity > 1)
opacity = 1;
if (opacity <= 0)
continue;
if (opacity < 1) {
w.writeStartElement("g");
w.writeAttribute("style", "opacity:" + opacity + ";");
}
w.writeEmptyElement("use");
w.writeAttribute("x", "" + variantIndexToPixel.apply(vidx));
w.writeAttribute("y", String.valueOf(y));
w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
if (opacity < 1) {
w.writeEndElement();
}
}
w.writeCharacters("\n");
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(sample);
w.writeEndElement();
y += this.genotype_width;
}
w.writeCharacters("\n");
w.writeEndDocument();
w.writeCharacters("\n");
w.flush();
w.close();
if (outputFile != null) {
outputStream.flush();
outputStream.close();
outputStream = null;
}
if (stop_at_first) {
LOG.info("Stop after first SVG document");
break;
}
}
r.close();
manifestW.flush();
manifestW.close();
manifestW = null;
return 0;
} catch (Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(tabix);
CloserUtil.close(outputStream);
CloserUtil.close(manifestW);
}
}
use of com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader in project jvarkit by lindenb.
the class TView method initialize.
public int initialize() throws IOException {
if (this.referenceFile != null) {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFile);
}
if (this.samRecordFilter == null) {
this.samRecordFilter = SamFilterParser.ACCEPT_ALL;
}
final SamReaderFactory srf = SamReaderFactory.makeDefault().referenceSequence(this.referenceFile).validationStringency(ValidationStringency.LENIENT);
for (final SamInputResource sir : this.samInputResources) {
final SamReader samReader = srf.open(sir);
this.samReaders.add(samReader);
}
for (final File vcfFile : IOUtils.unrollFile(this.variantFiles)) {
final VcfSource vcfSource = new VcfSource();
LOG.debug("OPEN " + vcfFile);
vcfSource.vcfFile = vcfFile;
vcfSource.vcfFileReader = new VCFFileReader(vcfFile, true);
this.vcfReaders.add(vcfSource);
}
if (this.tabixKnownGene != null) {
this.tabixKnownGene = new TabixKnownGeneFileReader(this.knownGeneUri);
}
return 0;
}
use of com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader in project jvarkit by lindenb.
the class VcfDoest method doWork.
@Override
public int doWork(final List<String> args) {
if (this.knownGeneURI == null || this.knownGeneURI.trim().isEmpty()) {
LOG.error("undefined option knownGeneURI");
return -1;
}
if (!this.knownGeneURI.endsWith(".gz")) {
LOG.error(this.knownGeneURI + " doesn't end with '.gz'");
return -1;
}
LOG.info("reading " + this.knownGeneURI);
BufferedReader r = null;
LineIterator li = null;
PrintWriter pw = null;
try {
LOG.info("loading tabix knownGene :" + this.knownGeneURI);
this.knownGenesTabix = new TabixKnownGeneFileReader(this.knownGeneURI);
final String inputFile = oneFileOrNull(args);
pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
li = (inputFile == null ? IOUtils.openStreamForLineIterator(stdin()) : IOUtils.openFileForLineIterator(new File(inputFile)));
run(li, pw);
CloserUtil.close(li);
li = null;
pw.flush();
pw.close();
pw = null;
LOG.info("done");
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(li);
CloserUtil.close(pw);
CloserUtil.close(this.knownGenesTabix);
}
}
Aggregations