use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.
the class VcfToSvg method doWork.
@Override
public int doWork(final List<String> args) {
VCFReader r = null;
OutputStream outputStream = null;
XMLStreamWriter w = null;
PrintWriter manifestW = null;
ArchiveFactory archiveFactory = null;
try {
r = VCFReaderFactory.makeDefault().open(Paths.get(oneAndOnlyOneFile(args)), true);
final VCFHeader header = r.getHeader();
final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
intervalListProvider.dictionary(dict);
/* read gtf if any */
final IntervalTreeMap<Gene> geneMap = new IntervalTreeMap<>();
if (this.gtfPath != null) {
try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
gtfReader.getAllGenes().stream().filter(G -> !this.removeNonCoding || G.getTranscripts().stream().anyMatch(T -> T.isCoding())).forEach(G -> geneMap.put(new Interval(G), G));
}
}
archiveFactory = ArchiveFactory.open(this.outputPath);
if (manifestFile != null) {
manifestW = IOUtils.openPathForPrintWriter(this.manifestFile);
} else {
manifestW = new PrintWriter(new NullOuputStream());
}
final Pedigree pedigree;
if (this.pedPath == null) {
pedigree = PedigreeParser.empty();
} else {
pedigree = new PedigreeParser().parse(this.pedPath);
}
final Path tmpSvg = Files.createTempFile("vcf.", ".svg");
final XMLOutputFactory xof = XMLOutputFactory.newInstance();
for (final Locatable interval : intervalListProvider.dictionary(dict).stream().collect(Collectors.toList())) {
final List<VariantContext> variants = r.query(interval).stream().filter(V -> this.variantFILTEREDOpacity > 0 || !V.isFiltered()).filter(V -> this.variantIndelOpacity > 0 || !V.isIndel()).collect(Collectors.toCollection(ArrayList::new));
if (variants.isEmpty())
continue;
final List<Transcript> transcripts = geneMap.getOverlapping(interval).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> !this.removeNonCoding || T.isCoding()).collect(Collectors.toList());
variants.removeIf(V -> this.gtfPath != null && this.variantsInExonOnly && transcripts.stream().flatMap(T -> T.getExons().stream()).noneMatch(EX -> EX.overlaps(V)));
if (variants.isEmpty())
continue;
final String geneId = transcripts.stream().map(T -> T.getGene().getId()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
final String geneName = transcripts.stream().map(T -> T.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
outputStream = IOUtils.openPathForWriting(tmpSvg);
w = xof.createXMLStreamWriter(outputStream);
double featureHeight = 10;
double TRANSCRIPT_HEIGHT = featureHeight;
final int all_genotypes_width = variants.size() * this.genotype_width;
final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
final int interline_weight = 6;
final int margin_top = 10;
final int margin_bottom = 10;
final int margin_right = 100;
final int margin_left = 100;
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + transcripts.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + samples.size() * this.genotype_width));
title(w, interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
w.writeStartElement("desc");
w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
w.writeEndElement();
// defs
w.writeStartElement("defs");
// genotypes
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:silver;stroke;gray;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:crimson;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.MIXED);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:pink;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:gray;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HET);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;black;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(genotype_width));
w.writeAttribute("height", String.valueOf(genotype_width));
w.writeEmptyElement("polygon");
w.writeAttribute("style", "fill:crimson;stroke;black;");
w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
w.writeEndElement();
// strand
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandF");
w.writeAttribute("points", "-5,-5 0,0 -5,5");
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandR");
w.writeAttribute("points", "5,-5 0,0 5,5");
// gradients
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
// style
w.writeEndElement();
final IntFunction<Integer> trim = t -> Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
final IntFunction<Double> baseToPixel = t -> margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.getLengthOnReference());
final IntFunction<Double> variantIndexToPixel = idx -> {
final double variant_width = drawinAreaWidth / (double) variants.size();
final double midx = variant_width * idx + variant_width / 2.0;
return margin_left + midx - genotype_width / 2.0;
};
final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
/**
* title
*/
double y = 0;
w.writeStartElement("text");
w.writeAttribute("x", "0");
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeCharacters(interval.toString());
w.writeEndElement();
y += featureHeight;
for (final Transcript g : transcripts) {
int cdsHeigh = 5;
double exonHeight = TRANSCRIPT_HEIGHT - 5;
double midY = TRANSCRIPT_HEIGHT / 2;
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
title(w, g.getId());
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(g.getId());
w.writeEndElement();
/* transcript line */
w.writeEmptyElement("line");
w.writeAttribute("class", "kgtr");
w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
w.writeAttribute("y1", String.valueOf(midY));
w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
w.writeAttribute("y2", String.valueOf(midY));
/* strand symbols */
for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.getLengthOnReference();
if (pos0 + 1 < g.getTxStart())
continue;
if (pos0 > g.getTxEnd())
break;
w.writeEmptyElement("use");
w.writeAttribute("class", "kgstrand");
w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
w.writeAttribute("x", String.valueOf(margin_left + pixX));
w.writeAttribute("y", String.valueOf(midY));
}
/* exons */
for (final Exon exon : g.getExons()) {
if (exon.getStart() + 1 >= interval.getEnd())
continue;
if (exon.getEnd() <= interval.getStart())
continue;
w.writeStartElement("rect");
w.writeAttribute("class", "kgexon");
w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
w.writeAttribute("height", String.valueOf(exonHeight));
title(w, exon.getName());
w.writeEndElement();
}
/* coding line */
if (!g.isNonCoding() && g.hasCodonStartDefined() && g.hasCodonStopDefined()) {
final double codonx1 = baseToPixel.apply(trim.apply(g.getLeftmostCodon().get().getStart()));
final double codonx2 = baseToPixel.apply(trim.apply(g.getRightmostCodon().get().getEnd()));
w.writeEmptyElement("rect");
w.writeAttribute("class", "kgcds");
w.writeAttribute("x", String.valueOf(codonx1));
w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
w.writeAttribute("width", String.valueOf(baseToPixel.apply((int) (codonx2 - codonx1))));
w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
}
// String label=String.format("%15s", g.getName());
// w.writeEmptyElement("path");
// double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
// w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
w.writeEndElement();
w.writeCharacters("\n");
y += featureHeight;
}
/* draw lines to variants */
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
double x1 = baseToPixel.apply(vc.getStart());
double x2 = baseToPixel.apply(vc.getEnd());
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("polygon");
w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
title(w, variantTitle.apply(vc));
w.writeEndElement();
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("text");
w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("class", "postick");
w.writeCharacters(variantTitle.apply(vc));
w.writeEndElement();
w.writeCharacters("\n");
}
y += featureHeight * interline_weight;
w.writeStartElement("g");
/* step 0: affected, 1: unaffected, 2: others */
for (int step = 0; step < 3; ++step) {
for (final String sample : samples) {
final Sample individual = pedigree.getSampleById(sample);
if (step == 0 && (individual == null || !individual.isAffected()))
continue;
if (step == 1 && (individual == null || !individual.isUnaffected()))
continue;
if (step == 2 && individual != null && individual.isStatusSet())
continue;
// sample
w.writeStartElement("g");
switch(step) {
case 0:
w.writeAttribute("style", "hue-rotate(195deg);");
break;
case 1:
w.writeAttribute("style", "hue-rotate(45deg);");
break;
default:
break;
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final Genotype g = vc.getGenotype(sample);
double opacity = 1.0;
if (vc.isIndel())
opacity *= this.variantIndelOpacity;
if (vc.isFiltered())
opacity *= this.variantFILTEREDOpacity;
if (opacity > 1)
opacity = 1;
if (opacity <= 0)
continue;
if (opacity < 1) {
w.writeStartElement("g");
w.writeAttribute("style", "opacity:" + opacity + ";");
}
w.writeEmptyElement("use");
w.writeAttribute("x", String.valueOf(variantIndexToPixel.apply(vidx)));
w.writeAttribute("y", String.valueOf(y));
w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
if (opacity < 1) {
w.writeEndElement();
}
}
w.writeCharacters("\n");
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(sample);
// text
w.writeEndElement();
// g for sample
w.writeEndElement();
y += this.genotype_width;
}
}
w.writeCharacters("\n");
w.writeEndDocument();
w.writeCharacters("\n");
w.flush();
w.close();
final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig().replaceAll("[/\\:]", "_") + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(geneName) ? "" : "." + geneName.replaceAll("[/\\:]", "")) + (StringUtils.isBlank(geneId) ? "" : "." + geneId.replaceAll("[/\\:]", "")) + ".svg";
OutputStream os = archiveFactory.openOuputStream(filename);
IOUtils.copyTo(tmpSvg, os);
os.flush();
os.close();
Files.delete(tmpSvg);
manifestW.print(interval.getContig());
manifestW.print('\t');
manifestW.print(interval.getStart() - 1);
manifestW.print('\t');
manifestW.print(interval.getEnd());
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print((archiveFactory.isTarOrZipArchive() ? "" : this.outputPath.toString() + File.separator) + filename);
manifestW.print('\t');
manifestW.println(variants.size());
}
r.close();
manifestW.flush();
manifestW.close();
manifestW = null;
archiveFactory.close();
archiveFactory = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(archiveFactory);
CloserUtil.close(r);
CloserUtil.close(outputStream);
CloserUtil.close(manifestW);
}
}
use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.
the class Biostar81455 method doWork.
@Override
public int doWork(final List<String> args) {
BufferedReader r = null;
String line;
PrintStream out = null;
final CharSplitter tab = CharSplitter.TAB;
try {
try (final GtfReader gtfReader = new GtfReader(this.gtfPath)) {
gtfReader.getAllGenes().stream().forEach(G -> this.geneMap.put(new Interval(G), G));
}
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
final ContigNameConverter contigNameConverter = ContigNameConverter.fromIntervalTreeMap(this.geneMap);
try {
r = super.openBufferedReader(oneFileOrNull(args));
out = openPathOrStdoutAsPrintStream(this.outputFile);
while ((line = r.readLine()) != null) {
if (line.startsWith("#")) {
out.println(line);
continue;
}
boolean found = false;
final String[] tokens = tab.split(line);
final int pos1 = Integer.parseInt(tokens[1]) + (this.one_based ? 0 : 1);
final String convertCtg = contigNameConverter.apply(tokens[0]);
if (convertCtg == null) {
LOG.error("CANNOT FIND contig " + tokens[0]);
out.println("##UNKNOWN CONTIG " + line);
continue;
}
final SimplePosition position = new SimplePosition(convertCtg, pos1);
final List<Transcript> transcripts = this.geneMap.getOverlapping(position).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(position)).collect(Collectors.toList());
if (transcripts.isEmpty()) {
LOG.info("no gene found in chromosome " + tokens[0] + " (check chrom prefix?)");
} else {
for (final Transcript kg : transcripts) {
Exon bestExon = null;
for (final Exon exon : kg.getExons()) {
if (bestExon == null || Math.abs(distance(position.getPosition(), exon)) < Math.abs(distance(position.getPosition(), bestExon))) {
bestExon = exon;
}
}
if (bestExon != null) {
out.print(line);
out.print("\t");
out.print(bestExon.getTranscript().getId());
out.print("\t");
out.print(kg.getStart() - 1);
out.print("\t");
out.print(kg.getEnd());
out.print("\t");
out.print(kg.getStrand());
out.print("\t");
out.print(bestExon.getName());
out.print("\t");
out.print(bestExon.getStart() - 1);
out.print("\t");
out.print(bestExon.getEnd());
out.print("\t");
out.print(distance(position.getPosition(), bestExon));
out.println();
found = true;
}
}
}
if (!found) {
out.println(line + "\tNULL");
}
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(out);
}
}
use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.
the class BackLocate method backLocate.
private void backLocate(final PrintStream out, final Transcript transcript, final String geneName, final AminoAcid aa1, final AminoAcid aa2, int peptidePos1, final String extraUserData) throws IOException {
final Set<String> messages = new LinkedHashSet<>();
final GeneticCode geneticCode = getGeneticCodeByChromosome(transcript.getContig());
final RNASequenceFactory rnaSequenceFactory = new RNASequenceFactory();
if (this.genomicContig == null || !this.genomicContig.getChrom().equals(transcript.getContig())) {
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceGenome);
final SAMSequenceRecord ssr = dict.getSequence(transcript.getContig());
if (ssr == null) {
LOG.warn(JvarkitException.ContigNotFoundInDictionary.getMessage(transcript.getContig(), dict));
return;
}
this.genomicContig = new GenomicSequence(this.referenceGenome, transcript.getContig());
}
rnaSequenceFactory.setContigToGenomicSequence(C -> this.genomicContig);
final RNASequence wildRNA = rnaSequenceFactory.getCodingRNA(transcript);
final PeptideSequence<RNASequence> wildProt = PeptideSequence.of(wildRNA, geneticCode);
final int peptideIndex0 = peptidePos1 - 1;
if (peptideIndex0 >= wildProt.length()) {
out.println("##index out of range for :" + transcript.getId() + " petide length=" + wildProt.length());
return;
}
if (wildProt.charAt(peptideIndex0) != Character.toUpperCase(aa1.getOneLetterCode())) {
messages.add("REF aminod acid [" + peptidePos1 + "] is not the same (" + wildProt.charAt(peptideIndex0) + "/" + aa1 + ")");
}
final int[] indexesInRNA = new int[] { 0 + peptideIndex0 * 3, 1 + peptideIndex0 * 3, 2 + peptideIndex0 * 3 };
final String wildCodon = "" + wildRNA.charAt(indexesInRNA[0]) + wildRNA.charAt(indexesInRNA[1]) + wildRNA.charAt(indexesInRNA[2]);
/* 2015 : adding possible mut codons */
final Set<String> possibleAltCodons = new HashSet<>();
final char[] bases = new char[] { 'A', 'C', 'G', 'T' };
for (int codon_pos = 0; codon_pos < 3; ++codon_pos) {
final StringBuilder sb = new StringBuilder(wildCodon);
for (char mutBase : bases) {
sb.setCharAt(codon_pos, mutBase);
if (geneticCode.translate(sb.charAt(0), sb.charAt(1), sb.charAt(2)) == Character.toUpperCase(aa2.getOneLetterCode())) {
possibleAltCodons.add(sb.toString());
}
}
}
for (int indexInRna : indexesInRNA) {
out.print(geneName);
out.print('\t');
out.print(aa1.getThreeLettersCode());
out.print('\t');
out.print(peptidePos1);
out.print('\t');
out.print(aa2.getThreeLettersCode());
out.print('\t');
out.print(transcript.getGene().getGeneName());
out.print('\t');
out.print(transcript.getId());
out.print('\t');
out.print(transcript.getStrand());
out.print('\t');
out.print(wildProt.charAt(peptideIndex0));
out.print('\t');
out.print(indexInRna);
out.print('\t');
out.print(wildCodon);
out.print('\t');
if (possibleAltCodons.isEmpty()) {
out.print('.');
} else {
out.print(String.join("|", possibleAltCodons));
}
out.print('\t');
out.print(wildRNA.charAt(indexInRna));
out.print('\t');
out.print(transcript.getContig());
out.print('\t');
out.print(wildRNA.convertRnaIndex0ToGenomic0(indexInRna));
out.print('\t');
String exonName = null;
for (final Exon exon : transcript.getExons()) {
int genome = wildRNA.convertRnaIndex0ToGenomic0(indexInRna);
if (exon.contains(genome)) {
exonName = exon.getName();
break;
}
}
out.print(exonName);
if (this.printSequences) {
String s = wildRNA.toString();
out.print('\t');
out.print(s.substring(0, indexInRna) + "[" + s.charAt(indexInRna) + "]" + (indexInRna + 1 < s.length() ? s.substring(indexInRna + 1) : ""));
s = wildProt.toString();
out.print('\t');
out.print(s.substring(0, peptideIndex0) + "[" + aa1 + "/" + aa2 + "/" + wildProt.charAt(peptideIndex0) + "]" + (peptideIndex0 + 1 < s.length() ? s.substring(peptideIndex0 + 1) : ""));
}
out.print('\t');
out.print(messages.isEmpty() ? "." : String.join(";", messages));
out.print('\t');
out.print(extraUserData);
out.println();
}
}
use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.
the class GtfUpstreamOrf method doWork.
@Override
public int doWork(final List<String> args) {
GtfReader gtfReader = null;
PrintWriter pw = null;
try {
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
this.refCtgNameConverter = ContigNameConverter.fromOneDictionary(refDict);
final ContigDictComparator ctgDictComparator = new ContigDictComparator(refDict);
final String input = oneFileOrNull(args);
gtfReader = input == null ? new GtfReader(stdin()) : new GtfReader(input);
gtfReader.setContigNameConverter(this.refCtgNameConverter);
final List<Gene> genes = gtfReader.getAllGenes().stream().filter(G -> G.hasStrand()).sorted((A, B) -> {
int i = ctgDictComparator.compare(A.getContig(), B.getContig());
if (i != 0)
return i;
i = Integer.compare(A.getStart(), B.getStart());
if (i != 0)
return i;
return Integer.compare(A.getEnd(), B.getEnd());
}).collect(Collectors.toList());
gtfReader.close();
gtfReader = null;
pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
for (final KozakSequence.Strength f : KozakSequence.Strength.values()) {
pw.println("#kozak." + f.name() + "=" + kozakStrengthToScore(f));
}
final String gtfSource = getProgramName().toLowerCase();
for (final SAMSequenceRecord ssr : refDict.getSequences()) {
pw.println("##contig " + ssr.getSequenceName() + ": length:" + ssr.getSequenceLength());
}
pw.println("#" + gtfSource + ":" + JVarkitVersion.getInstance().toString());
if (!StringUtils.isBlank(input)) {
pw.println("#gtf:" + input);
}
final ProgressFactory.Watcher<Gene> progress = ProgressFactory.newInstance().dictionary(refDict).logger(LOG).build();
for (final Gene gene : genes) {
progress.apply(gene);
/* new reference sequence */
if (this.genomicSequence == null || !this.genomicSequence.getChrom().equals(gene.getContig())) {
this.genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, gene.getContig());
}
final List<RNASequence> rnas = gene.getTranscripts().stream().filter(T -> T.isCoding()).filter(T -> T.hasStrand()).filter(T -> T.hasCodonStartDefined()).filter(T -> T.getTranscriptUTR5().isPresent()).map(T -> new RNASequence(T)).collect(Collectors.toList());
if (rnas.isEmpty())
continue;
final Set<OpenReadingFrame> orfs = rnas.stream().flatMap(R -> R.getUpstreamOpenReadingFrames().stream()).collect(Collectors.toSet());
if (orfs.isEmpty())
continue;
boolean gene_printed = false;
for (final OpenReadingFrame uORF : orfs) {
/* is there any other RNA containing this uORF ?*/
if (rnas.stream().filter(other -> !other.getTranscript().getId().equals(uORF.mRNA.getTranscript().getId())).anyMatch(other -> {
// other must have atg in frame and ATG before the observed one
final int other_atg0 = other.getATG0InRNA();
if (uORF.in_rna_atg0 % 3 != other_atg0 % 3)
return false;
return uORF.in_rna_atg0 >= other_atg0;
}))
continue;
final String transcript_id = uORF.getTranscript().getId() + ".uorf" + (1 + uORF.in_rna_atg0);
if (!gene_printed) {
gene_printed = true;
pw.print(gene.getContig());
pw.print("\t");
// source
pw.print(gtfSource);
pw.print("\t");
pw.print("gene");
pw.print("\t");
// start
pw.print(gene.getStart());
pw.print("\t");
// end
pw.print(gene.getEnd());
pw.print("\t");
// score
pw.print(".");
pw.print("\t");
// strand
pw.print(gene.getStrand());
pw.print("\t");
// phase
pw.print(".");
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.println();
}
// TRANSCRIPT
final UTR utr_5_prime = uORF.getTranscript().getTranscriptUTR5().get();
pw.print(gene.getContig());
pw.print("\t");
pw.print(gtfSource);
pw.print("\t");
pw.print("transcript");
pw.print("\t");
if (gene.isPositiveStrand()) {
pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_atg0] + 1);
pw.print("\t");
// pw.print(transcriptSequence.mrnaIndex0ToBase[uORF.in_rna_stop0]+1);
if (uORF.in_rna_stop0 == NPOS) {
pw.print(uORF.mRNA.getTranscript().getTxEnd());
} else {
pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_stop0] + 1);
}
} else {
if (uORF.in_rna_stop0 == NPOS) {
pw.print(uORF.mRNA.getTranscript().getTxStart());
} else {
pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_stop0] + 1);
}
pw.print("\t");
pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_atg0] + 1);
}
pw.print("\t");
// score
pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
pw.print("\t");
// strand
pw.print(gene.getStrand());
pw.print("\t");
// phase
pw.print("0");
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.print(keyvalue("transcript_id", transcript_id));
pw.print(keyvalue("transcript_biotype", "uORF"));
pw.print(keyvalue("kozak-seq", uORF.kozak.getString()));
pw.print(keyvalue("kozak-strength", uORF.kozak.getStrength()));
pw.print(keyvalue("translation", uORF.peptide));
pw.print(keyvalue("uORF-atg-in-frame-with-transcript-atg", uORF.uorf_atg_in_frame));
pw.print(keyvalue("utr", utr_5_prime.toString() + " " + utr_5_prime.getStart() + "-" + utr_5_prime.getEnd()));
pw.println();
// Exon
for (final Exon exon : uORF.getTranscript().getExons()) {
pw.print(exon.getContig());
pw.print("\t");
pw.print(gtfSource);
pw.print("\t");
pw.print("exon");
pw.print("\t");
pw.print(exon.getStart());
pw.print("\t");
pw.print(exon.getEnd());
pw.print("\t");
// score
pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
pw.print("\t");
// strand
pw.print(exon.getStrand());
pw.print("\t");
// phase
pw.print(0);
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.print(keyvalue("transcript_id", transcript_id));
pw.println();
}
final List<Interval> startBlocks = uORF.mRNA.getCodonBlocks(uORF.in_rna_atg0, uORF.in_rna_atg0 + 1, uORF.in_rna_atg0 + 2);
final List<Interval> stopBlocks = uORF.in_rna_stop0 != NPOS ? uORF.mRNA.getCodonBlocks(uORF.in_rna_stop0, uORF.in_rna_stop0 + 1, uORF.in_rna_stop0 + 2) : Collections.emptyList();
// CDS
if (!stopBlocks.isEmpty()) {
final int cdsStart = startBlocks.stream().mapToInt(B -> B.getStart()).min().orElseThrow(IllegalStateException::new);
final int cdsEnd = stopBlocks.stream().mapToInt(B -> B.getEnd()).max().orElseThrow(IllegalStateException::new);
for (final Exon exon : uORF.getTranscript().getExons()) {
if (exon.getEnd() < cdsStart)
continue;
if (exon.getStart() > cdsEnd)
break;
pw.print(exon.getContig());
pw.print("\t");
pw.print(gtfSource);
pw.print("\t");
pw.print("CDS");
pw.print("\t");
pw.print(Math.max(cdsStart, exon.getStart()));
pw.print("\t");
pw.print(Math.min(cdsEnd, exon.getEnd()));
pw.print("\t");
// score
pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
pw.print("\t");
// strand
pw.print(exon.getStrand());
pw.print("\t");
// phase
pw.print(uORF.getFrameAt(Math.max(cdsStart, exon.getStart())));
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.print(keyvalue("transcript_id", transcript_id));
pw.println();
}
}
// CODON START
for (final Interval startc : startBlocks) {
PARANOID.assertLe(startc.getStart(), startc.getEnd());
pw.print(startc.getContig());
pw.print("\t");
pw.print(gtfSource);
pw.print("\t");
pw.print("start_codon");
pw.print("\t");
pw.print(startc.getStart());
pw.print("\t");
pw.print(startc.getEnd());
pw.print("\t");
// score
pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
pw.print("\t");
// strand
pw.print(gene.getStrand());
pw.print("\t");
// phase
pw.print(uORF.getFrameAt(startc.getStart()));
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.print(keyvalue("transcript_id", transcript_id));
pw.print(keyvalue("distance-mrna-atg", uORF.mRNA.getATG0InRNA() - uORF.in_rna_atg0));
pw.print(keyvalue("pos0-in-mrna", uORF.in_rna_atg0));
pw.print(keyvalue("spliced", startBlocks.size() > 1));
pw.println();
}
// CODON END
for (final Interval stopc : stopBlocks) /* might be empty */
{
PARANOID.assertLe(stopc.getStart(), stopc.getEnd());
pw.print(stopc.getContig());
pw.print("\t");
pw.print(gtfSource);
pw.print("\t");
pw.print("stop_codon");
pw.print("\t");
pw.print(stopc.getStart());
pw.print("\t");
pw.print(stopc.getEnd());
pw.print("\t");
// score
pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
pw.print("\t");
// strand
pw.print(gene.getStrand());
pw.print("\t");
// phase
pw.print(uORF.getFrameAt(stopc.getStart()));
pw.print("\t");
pw.print(keyvalue("gene_id", gene.getId()));
pw.print(keyvalue("transcript_id", transcript_id));
pw.print(keyvalue("spliced", stopBlocks.size() > 1));
pw.println();
}
}
}
progress.close();
pw.flush();
pw.close();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
}
}
use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.
the class VcfToSvg method doWork.
@Override
public int doWork(final List<String> args) {
VCFReader r = null;
OutputStream outputStream = null;
XMLStreamWriter w = null;
PrintWriter manifestW = null;
ArchiveFactory archiveFactory = null;
try {
r = VCFReaderFactory.makeDefault().open(Paths.get(oneAndOnlyOneFile(args)), true);
final VCFHeader header = r.getHeader();
final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
intervalListProvider.dictionary(dict);
/* read gtf if any */
final IntervalTreeMap<Gene> geneMap = new IntervalTreeMap<>();
if (this.gtfPath != null) {
try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
gtfReader.getAllGenes().stream().filter(G -> !this.removeNonCoding || G.getTranscripts().stream().anyMatch(T -> T.isCoding())).forEach(G -> geneMap.put(new Interval(G), G));
}
}
archiveFactory = ArchiveFactory.open(this.outputPath);
if (manifestFile != null) {
manifestW = IOUtils.openPathForPrintWriter(this.manifestFile);
} else {
manifestW = new PrintWriter(new NullOuputStream());
}
final Pedigree pedigree;
if (this.pedPath == null) {
pedigree = PedigreeParser.empty();
} else {
pedigree = new PedigreeParser().parse(this.pedPath);
}
final Path tmpSvg = Files.createTempFile("vcf.", ".svg");
final XMLOutputFactory xof = XMLOutputFactory.newInstance();
for (final Locatable interval : intervalListProvider.dictionary(dict).stream().collect(Collectors.toList())) {
final List<VariantContext> variants = r.query(interval).stream().filter(V -> this.variantFILTEREDOpacity > 0 || !V.isFiltered()).filter(V -> this.variantIndelOpacity > 0 || !V.isIndel()).collect(Collectors.toCollection(ArrayList::new));
if (variants.isEmpty())
continue;
final List<Transcript> transcripts = geneMap.getOverlapping(interval).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> !this.removeNonCoding || T.isCoding()).collect(Collectors.toList());
variants.removeIf(V -> this.gtfPath != null && this.variantsInExonOnly && transcripts.stream().flatMap(T -> T.getExons().stream()).noneMatch(EX -> EX.overlaps(V)));
if (variants.isEmpty())
continue;
final String geneId = transcripts.stream().map(T -> T.getGene().getId()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
final String geneName = transcripts.stream().map(T -> T.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
outputStream = IOUtils.openPathForWriting(tmpSvg);
w = xof.createXMLStreamWriter(outputStream);
double featureHeight = 10;
double TRANSCRIPT_HEIGHT = featureHeight;
final int all_genotypes_width = variants.size() * this.genotype_width;
final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
final int interline_weight = 6;
final int margin_top = 10;
final int margin_bottom = 10;
final int margin_right = 100;
final int margin_left = 100;
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + transcripts.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + samples.size() * this.genotype_width));
title(w, interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
w.writeStartElement("desc");
w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
w.writeEndElement();
// defs
w.writeStartElement("defs");
// genotypes
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:silver;stroke;gray;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:crimson;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.MIXED);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:pink;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:gray;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HET);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;black;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(genotype_width));
w.writeAttribute("height", String.valueOf(genotype_width));
w.writeEmptyElement("polygon");
w.writeAttribute("style", "fill:crimson;stroke;black;");
w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
w.writeEndElement();
// strand
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandF");
w.writeAttribute("points", "-5,-5 0,0 -5,5");
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandR");
w.writeAttribute("points", "5,-5 0,0 5,5");
// gradients
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
// style
w.writeEndElement();
final IntFunction<Integer> trim = t -> Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
final IntFunction<Double> baseToPixel = t -> margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.getLengthOnReference());
final IntFunction<Double> variantIndexToPixel = idx -> {
final double variant_width = drawinAreaWidth / (double) variants.size();
final double midx = variant_width * idx + variant_width / 2.0;
return margin_left + midx - genotype_width / 2.0;
};
final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
/**
* title
*/
double y = 0;
w.writeStartElement("text");
w.writeAttribute("x", "0");
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeCharacters(interval.toString());
w.writeEndElement();
y += featureHeight;
for (final Transcript g : transcripts) {
int cdsHeigh = 5;
double exonHeight = TRANSCRIPT_HEIGHT - 5;
double midY = TRANSCRIPT_HEIGHT / 2;
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
title(w, g.getId());
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(g.getId());
w.writeEndElement();
/* transcript line */
w.writeEmptyElement("line");
w.writeAttribute("class", "kgtr");
w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
w.writeAttribute("y1", String.valueOf(midY));
w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
w.writeAttribute("y2", String.valueOf(midY));
/* strand symbols */
for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.getLengthOnReference();
if (pos0 + 1 < g.getTxStart())
continue;
if (pos0 > g.getTxEnd())
break;
w.writeEmptyElement("use");
w.writeAttribute("class", "kgstrand");
w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
w.writeAttribute("x", String.valueOf(margin_left + pixX));
w.writeAttribute("y", String.valueOf(midY));
}
/* exons */
for (final Exon exon : g.getExons()) {
if (exon.getStart() + 1 >= interval.getEnd())
continue;
if (exon.getEnd() <= interval.getStart())
continue;
w.writeStartElement("rect");
w.writeAttribute("class", "kgexon");
w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
w.writeAttribute("height", String.valueOf(exonHeight));
title(w, exon.getName());
w.writeEndElement();
}
/* coding line */
if (!g.isNonCoding() && g.hasCodonStartDefined() && g.hasCodonStopDefined()) {
final double codonx1 = baseToPixel.apply(trim.apply(g.getLeftmostCodon().get().getStart()));
final double codonx2 = baseToPixel.apply(trim.apply(g.getRightmostCodon().get().getEnd()));
w.writeEmptyElement("rect");
w.writeAttribute("class", "kgcds");
w.writeAttribute("x", String.valueOf(codonx1));
w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
w.writeAttribute("width", String.valueOf(baseToPixel.apply((int) (codonx2 - codonx1))));
w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
}
// String label=String.format("%15s", g.getName());
// w.writeEmptyElement("path");
// double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
// w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
w.writeEndElement();
w.writeCharacters("\n");
y += featureHeight;
}
/* draw lines to variants */
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
double x1 = baseToPixel.apply(vc.getStart());
double x2 = baseToPixel.apply(vc.getEnd());
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("polygon");
w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
title(w, variantTitle.apply(vc));
w.writeEndElement();
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("text");
w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("class", "postick");
w.writeCharacters(variantTitle.apply(vc));
w.writeEndElement();
w.writeCharacters("\n");
}
y += featureHeight * interline_weight;
w.writeStartElement("g");
/* step 0: affected, 1: unaffected, 2: others */
for (int step = 0; step < 3; ++step) {
for (final String sample : samples) {
final Sample individual = pedigree.getSampleById(sample);
if (step == 0 && (individual == null || !individual.isAffected()))
continue;
if (step == 1 && (individual == null || !individual.isUnaffected()))
continue;
if (step == 2 && individual != null && individual.isStatusSet())
continue;
// sample
w.writeStartElement("g");
switch(step) {
case 0:
w.writeAttribute("style", "hue-rotate(195deg);");
break;
case 1:
w.writeAttribute("style", "hue-rotate(45deg);");
break;
default:
break;
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final Genotype g = vc.getGenotype(sample);
double opacity = 1.0;
if (vc.isIndel())
opacity *= this.variantIndelOpacity;
if (vc.isFiltered())
opacity *= this.variantFILTEREDOpacity;
if (opacity > 1)
opacity = 1;
if (opacity <= 0)
continue;
if (opacity < 1) {
w.writeStartElement("g");
w.writeAttribute("style", "opacity:" + opacity + ";");
}
w.writeEmptyElement("use");
w.writeAttribute("x", String.valueOf(variantIndexToPixel.apply(vidx)));
w.writeAttribute("y", String.valueOf(y));
w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
if (opacity < 1) {
w.writeEndElement();
}
}
w.writeCharacters("\n");
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(sample);
// text
w.writeEndElement();
// g for sample
w.writeEndElement();
y += this.genotype_width;
}
}
w.writeCharacters("\n");
w.writeEndDocument();
w.writeCharacters("\n");
w.flush();
w.close();
final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig().replaceAll("[/\\:]", "_") + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(geneName) ? "" : "." + geneName.replaceAll("[/\\:]", "")) + (StringUtils.isBlank(geneId) ? "" : "." + geneId.replaceAll("[/\\:]", "")) + ".svg";
OutputStream os = archiveFactory.openOuputStream(filename);
IOUtils.copyTo(tmpSvg, os);
os.flush();
os.close();
Files.delete(tmpSvg);
manifestW.print(interval.getContig());
manifestW.print('\t');
manifestW.print(interval.getStart() - 1);
manifestW.print('\t');
manifestW.print(interval.getEnd());
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print((archiveFactory.isTarOrZipArchive() ? "" : this.outputPath.toString() + File.separator) + filename);
manifestW.print('\t');
manifestW.println(variants.size());
}
r.close();
manifestW.flush();
manifestW.close();
manifestW = null;
archiveFactory.close();
archiveFactory = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(archiveFactory);
CloserUtil.close(r);
CloserUtil.close(outputStream);
CloserUtil.close(manifestW);
}
}
Aggregations