use of com.github.lindenb.jvarkit.util.samtools.ContigDictComparator in project jvarkit by lindenb.
the class ReferenceToHtml method doWork.
@Override
public int doWork(final List<String> args) {
try {
try (VCFReader reader = VCFReaderFactory.makeDefault().open(Paths.get(oneAndOnlyOneFile(args)), true)) {
final VCFHeader header = reader.getHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final Optional<String> buildName = SequenceDictionaryUtils.getBuildName(dict);
try (ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx)) {
SequenceUtil.assertSequenceDictionariesEqual(SequenceDictionaryUtils.extractRequired(reference), dict);
final List<Locatable> locs = IntervalListProvider.from(this.regionsStr).dictionary(dict).stream().sorted(new ContigDictComparator(dict).createLocatableComparator()).collect(Collectors.toList());
if (locs.isEmpty()) {
LOG.error("no region was defined");
return -1;
}
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
try (ArchiveFactory archive = ArchiveFactory.open(archiveOutput)) {
try (PrintWriter pw = archive.openWriter(this.prefix + "script.js")) {
pw.println(StaticCodeExtractor.forClass(this.getClass()).extract("SCRIPT").get());
pw.flush();
}
try (PrintWriter pw = archive.openWriter(this.prefix + "style.css")) {
pw.println(StaticCodeExtractor.forClass(this.getClass()).extract("CSS").get());
pw.flush();
}
final OutputStream index_os = archive.openOuputStream(this.prefix + "index.html");
final XMLStreamWriter index_html = xof.createXMLStreamWriter(index_os, "UTF-8");
index_html.writeStartDocument("UTF-8", "1.0");
index_html.writeStartElement("html");
index_html.writeStartElement("head");
writeMeta(index_html);
index_html.writeStartElement("title");
index_html.writeCharacters(this.getProgramName());
// title
index_html.writeEndElement();
index_html.writeStartElement("link");
index_html.writeAttribute("rel", "stylesheet");
index_html.writeAttribute("href", this.prefix + "style.css");
index_html.writeCharacters("");
// link
index_html.writeEndElement();
// head
index_html.writeEndElement();
index_html.writeStartElement("body");
index_html.writeStartElement("ul");
for (final Locatable loc : locs) {
final List<VariantContext> variants;
try (CloseableIterator<VariantContext> iter = reader.query(loc)) {
variants = iter.stream().collect(Collectors.toList());
}
StringWriter sw = new StringWriter();
sw.append("var g = ");
JsonWriter jsw = new JsonWriter(sw);
jsw.beginObject();
jsw.name("contig").value(loc.getContig());
jsw.name("start").value(loc.getStart());
jsw.name("end").value(loc.getEnd());
jsw.name("length").value(loc.getLengthOnReference());
jsw.name("sequence").value(reference.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getEnd()).getBaseString());
jsw.name("variants");
jsw.beginArray();
for (VariantContext ctx : variants) {
jsw.beginObject();
jsw.name("start").value(ctx.getStart());
jsw.name("end").value(ctx.getEnd());
jsw.name("id").value(ctx.hasID() ? ctx.getID() : "");
jsw.name("type").value(ctx.getType().name());
jsw.name("ref").value(ctx.getReference().getDisplayString());
jsw.name("alts");
jsw.beginArray();
for (Allele alt : ctx.getAlternateAlleles()) {
jsw.value(alt.getDisplayString());
}
jsw.endArray();
jsw.endObject();
}
jsw.endArray();
jsw.endObject();
jsw.flush();
jsw.close();
sw.append(";");
final String filename = this.prefix + loc.getContig() + "_" + loc.getStart() + "_" + loc.getEnd() + ".html";
final String title = (buildName.isPresent() ? buildName.get() + " " : "") + new SimpleInterval(loc).toNiceString();
OutputStream os = archive.openOuputStream(filename);
XMLStreamWriter out = xof.createXMLStreamWriter(os, "UTF-8");
out.writeStartDocument("UTF-8", "1.0");
out.writeStartElement("html");
out.writeStartElement("head");
writeMeta(out);
out.writeStartElement("script");
out.writeCharacters(sw.toString());
// script
out.writeEndElement();
out.writeStartElement("script");
out.writeAttribute("src", this.prefix + "script.js");
out.writeCharacters("");
// script
out.writeEndElement();
out.writeStartElement("link");
out.writeAttribute("rel", "stylesheet");
out.writeAttribute("href", this.prefix + "style.css");
out.writeCharacters("");
// link
out.writeEndElement();
out.writeStartElement("title");
out.writeCharacters(title + " N=" + StringUtils.niceInt(variants.size()));
// title
out.writeEndElement();
// head
out.writeEndElement();
out.writeStartElement("body");
out.writeStartElement("h1");
out.writeCharacters(title + " N=" + StringUtils.niceInt(variants.size()));
out.writeEndElement();
out.writeStartElement("div");
checkBox(out, "showPos", "Line Number");
checkBox(out, "showSpace", "Spaces");
checkBox(out, "showVar", "Show Variants");
out.writeCharacters(" ");
out.writeEmptyElement("input");
out.writeAttribute("id", "primer");
out.writeAttribute("type", "text");
out.writeAttribute("placeholder", "Primer Sequence");
out.writeStartElement("button");
out.writeAttribute("id", "search");
out.writeCharacters("Search...");
out.writeEndElement();
// div
out.writeEndElement();
out.writeStartElement("div");
out.writeAttribute("class", "sequence");
out.writeAttribute("id", "main");
// div
out.writeEndElement();
// body
out.writeEndElement();
// html
out.writeEndElement();
os.flush();
os.close();
index_html.writeStartElement("li");
index_html.writeStartElement("a");
index_html.writeAttribute("href", filename);
index_html.writeCharacters(title + " N=" + StringUtils.niceInt(variants.size()));
// a
index_html.writeEndElement();
// li
index_html.writeEndElement();
}
// ul
index_html.writeEndElement();
// body
index_html.writeEndElement();
// html
index_html.writeEndElement();
index_html.writeEndDocument();
index_html.close();
index_os.flush();
index_os.close();
}
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of com.github.lindenb.jvarkit.util.samtools.ContigDictComparator in project jvarkit by lindenb.
the class GtfRetroCopy method doWork.
@Override
public int doWork(final List<String> args) {
VCFReader vcfFileReader = null;
VariantContextWriter vcw0 = null;
try {
/* load the reference genome */
/* create a contig name converter from the REF */
final Set<String> knownGeneIds;
if (this.knownPath != null) {
try (BufferedReader br = IOUtils.openPathForBufferedReading(this.knownPath)) {
knownGeneIds = br.lines().filter(L -> !StringUtils.isBlank(L)).map(S -> S.trim()).filter(S -> !(S.equals("-") || S.equals(".") || S.startsWith("#"))).collect(Collectors.toSet());
}
} else {
knownGeneIds = Collections.emptySet();
}
// open the sam file
final String input = oneAndOnlyOneFile(args);
vcfFileReader = VCFReaderFactory.makeDefault().open(Paths.get(input), true);
final VCFHeader header = vcfFileReader.getHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
final Comparator<String> contigCmp = dict == null ? (A, B) -> A.compareTo(B) : new ContigDictComparator(dict);
final Comparator<Gene> geneCmp = (A, B) -> {
int i = contigCmp.compare(A.getContig(), B.getContig());
if (i != 0)
return i;
i = Integer.compare(A.getStart(), B.getStart());
if (i != 0)
return i;
return Integer.compare(A.getEnd(), B.getEnd());
};
final GtfReader gtfReader = new GtfReader(this.gtfPath);
if (dict != null && !dict.isEmpty()) {
this.writingVcf.dictionary(dict);
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
}
final List<Gene> genes = gtfReader.getAllGenes().stream().filter(G -> G.getTranscripts().stream().count() > 0L).filter(G -> G.getTranscripts().stream().anyMatch(T -> T.getIntronCount() >= this.min_intron_count)).sorted(geneCmp).collect(Collectors.toList());
gtfReader.close();
/**
* build vcf header
*/
final Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_QUALITY_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
metaData.add(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation Length"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_BOUNDS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Introns boundaries"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_SIZES, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Introns sizes"));
metaData.add(new VCFFilterHeaderLine(ATT_NOT_ALL_INTRONS, "Not all introns were found retrocopied"));
for (final String att : ENSEMBL_TRANSCRIPT_ATTS) {
metaData.add(new VCFInfoHeaderLine(att, 1, VCFHeaderLineType.String, "Value for the attribute '" + att + "' in the gtf"));
}
// metaData.add(new VCFFormatHeaderLine(ATT_COUNT_SUPPORTING_READS, 2,VCFHeaderLineType.Integer,"Count supporting reads [intron-left/intron-right]"));
// metaData.add(new VCFInfoHeaderLine(ATT_RETRO_DESC, VCFHeaderLineCount.UNBOUNDED,VCFHeaderLineType.String,
// "Retrocopy attributes: transcript-id|strand|exon-left|exon-left-bases|exon-right-bases|exon-right"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns for the Transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns found retrocopied for the transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_FRACTION, 1, VCFHeaderLineType.Float, "Fraction of introns found retrocopied for the transcript"));
metaData.add(new VCFFilterHeaderLine(ATT_NOT_ALL_INTRONS, "Not all introns were found retrocopied"));
metaData.add(new VCFFilterHeaderLine(ATT_KNOWN, "Known RetroGenes. " + (this.knownPath == null ? "" : " Source: " + this.knownPath)));
final VCFHeader header2 = new VCFHeader(header);
metaData.stream().forEach(H -> header2.addMetaDataLine(H));
JVarkitVersion.getInstance().addMetaData(this, header2);
final Allele ref = Allele.create((byte) 'N', true);
final Allele alt = Allele.create("<RETROCOPY>", false);
/* open vcf for writing*/
vcw0 = this.writingVcf.open(this.outputFile);
vcw0.writeHeader(header2);
final ProgressFactory.Watcher<Gene> progress = ProgressFactory.newInstance().logger(LOG).dictionary(dict).build();
for (final Gene gene : genes) {
progress.apply(gene);
final List<VariantContext> variants = new ArrayList<>();
final CloseableIterator<VariantContext> iter2 = vcfFileReader.query(gene.getContig(), gene.getStart(), gene.getEnd());
while (iter2.hasNext()) {
final VariantContext ctx = iter2.next();
// SNV
if (ctx.getStart() == ctx.getEnd())
continue;
StructuralVariantType svType = ctx.getStructuralVariantType();
if (StructuralVariantType.BND.equals(svType))
continue;
if (StructuralVariantType.INS.equals(svType))
continue;
variants.add(ctx);
}
iter2.close();
if (variants.isEmpty())
continue;
for (final Transcript transcript : gene.getTranscripts()) {
if (!transcript.hasIntron())
continue;
if (transcript.getIntronCount() < this.min_intron_count)
continue;
final Counter<String> samples = new Counter<>();
for (final Intron intron : transcript.getIntrons()) {
for (final VariantContext ctx : variants) {
if (!isWithinDistance(intron.getStart(), ctx.getStart()))
continue;
if (!isWithinDistance(intron.getEnd(), ctx.getEnd()))
continue;
if (ctx.hasGenotypes()) {
for (final Genotype g : ctx.getGenotypes()) {
if (g.isNoCall() || g.isHomRef())
continue;
samples.incr(g.getSampleName());
}
} else {
samples.incr("*");
}
}
// end iter2
}
// end intron
final long max_count = samples.stream().mapToLong(E -> E.getValue()).max().orElse(0L);
if (max_count == 0)
continue;
if (this.only_all_introns && max_count != transcript.getIntronCount())
continue;
// ok good candidate
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(transcript.getContig());
vcb.start(transcript.getStart());
vcb.stop(transcript.getEnd());
switch(this.idKey) {
case gene_name:
final String gn = transcript.getGene().getGeneName();
vcb.id(StringUtils.isBlank(gn) ? transcript.getId() : gn);
break;
case gene_id:
vcb.id(transcript.getGene().getId());
break;
case transcript_id:
vcb.id(transcript.getId());
break;
default:
throw new IllegalStateException();
}
final List<Allele> alleles = Arrays.asList(ref, alt);
// vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY,2);
// vcb.attribute(VCFConstants.ALLELE_COUNT_KEY,1);
// vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY,0.5);
vcb.attribute(VCFConstants.SVTYPE, "DEL");
vcb.attribute(VCFConstants.END_KEY, transcript.getEnd());
vcb.attribute("SVLEN", transcript.getLengthOnReference());
vcb.attribute(ATT_INTRONS_BOUNDS, transcript.getIntrons().stream().map(S -> "" + S.getStart() + "-" + S.getEnd()).collect(Collectors.toList()));
vcb.attribute(ATT_INTRONS_SIZES, transcript.getIntrons().stream().mapToInt(S -> S.getLengthOnReference()).toArray());
for (final String att : ENSEMBL_TRANSCRIPT_ATTS) {
final String v = transcript.getProperties().get(att);
if (StringUtils.isBlank(v))
continue;
vcb.attribute(att, v);
}
vcb.alleles(alleles);
boolean pass_filter = true;
// introns sequences
vcb.attribute(ATT_INTRONS_CANDIDATE_COUNT, max_count);
vcb.attribute(ATT_INTRONS_COUNT, transcript.getIntronCount());
vcb.attribute(ATT_INTRONS_CANDIDATE_FRACTION, max_count / (float) transcript.getIntronCount());
if (transcript.getIntronCount() != max_count) {
vcb.filter(ATT_NOT_ALL_INTRONS);
pass_filter = false;
}
if (knownGeneIds.contains(transcript.getGene().getId())) {
vcb.filter(ATT_KNOWN);
pass_filter = false;
}
if (header.hasGenotypingData()) {
final List<Genotype> genotypes = new ArrayList<>();
for (final String sn : header.getSampleNamesInOrder()) {
final List<Allele> gtalleles;
if (samples.count(sn) == 0L) {
gtalleles = Arrays.asList(ref, ref);
} else {
gtalleles = Arrays.asList(ref, alt);
}
final GenotypeBuilder gb = new GenotypeBuilder(sn, gtalleles);
genotypes.add(gb.make());
}
vcb.genotypes(genotypes);
}
if (pass_filter)
vcb.passFilters();
vcw0.add(vcb.make());
}
}
progress.close();
vcw0.close();
vcfFileReader.close();
vcfFileReader = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfFileReader);
CloserUtil.close(vcw0);
}
}
use of com.github.lindenb.jvarkit.util.samtools.ContigDictComparator in project jvarkit by lindenb.
the class WesCnvSvg method doWork.
@Override
public int doWork(final List<String> args) {
XMLStreamWriter w = null;
BufferedReader r = null;
OutputStream fout = null;
VCFReader vcfReader = null;
try {
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
final IntervalExtender extender = IntervalExtender.of(refDict, this.extendWhat);
if (extender.isShriking()) {
LOG.error("extend factor <1.0");
return -1;
}
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(refDict);
final ContigDictComparator contigDictCompare = new ContigDictComparator(refDict);
final List<CaptureInterval> userIntervals = this.intervalListProvider.stream().map(loc -> contigNameConverter.convertToSimpleInterval(loc).<RuntimeException>orElseThrow(() -> new RuntimeException(new JvarkitException.ContigNotFoundInDictionary(loc.getContig(), refDict)))).map(extender).collect(HtsCollectors.mergeIntervals()).map(L -> new CaptureInterval(L)).sorted(contigDictCompare.createLocatableComparator()).collect(Collectors.toCollection(ArrayList::new));
if (userIntervals.isEmpty()) {
LOG.error("no interval or bed defined");
return -1;
}
this.countBasesToBeDisplayed = userIntervals.stream().mapToInt(R -> R.getLengthOnReference()).sum();
if (this.countBasesToBeDisplayed < 1) {
LOG.error("Nothing to display. BED count==0");
return -1;
} else {
double x1 = 0;
for (int i = 0; i < userIntervals.size(); ++i) {
final CaptureInterval ci = userIntervals.get(i);
ci.pixelx = x1;
x1 += ci.getPixelWidth();
}
}
/* distinct ordered contigs */
final List<String> distinctContigs = userIntervals.stream().map(R -> R.getContig()).collect(Collectors.toSet()).stream().sorted(contigDictCompare).collect(Collectors.toList());
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidx);
for (final Path bamFile : IOUtils.unrollPaths(args)) {
final BamInput bi = new BamInput();
bi.bamPath = bamFile;
try (SamReader samReader = srf.open(bamFile)) {
final SAMSequenceDictionary samDict = SequenceDictionaryUtils.extractRequired(samReader.getFileHeader());
if (!SequenceUtil.areSequenceDictionariesEqual(refDict, samDict)) {
throw new JvarkitException.DictionariesAreNotTheSame(refDict, samDict);
}
bi.sample = samReader.getFileHeader().getReadGroups().stream().map(V -> V.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bamFile));
final CoverageFactory covFactory = new CoverageFactory().setMappingQuality(this.minMappingQuality);
for (final String contig : distinctContigs) {
final List<CaptureInterval> contig_intervals = userIntervals.stream().filter(R -> R.getContig().equals(contig)).collect(Collectors.toList());
final CoverageFactory.SimpleCoverage coverage = covFactory.getSimpleCoverage(samReader, contig_intervals, null);
for (CaptureInterval rgn : contig_intervals) {
final double[] array = coverage.getSubCoverage(rgn).scale(this.scaleType, (int) rgn.getPixelWidth());
bi.coverages.add(array);
}
}
}
this.bamInputs.add(bi);
}
if (this.bamInputs.isEmpty()) {
LOG.error("no bam input");
return -1;
}
if (this.vcfFile != null) {
vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
final SAMSequenceDictionary vcfDict = vcfReader.getHeader().getSequenceDictionary();
if (vcfDict != null)
SequenceUtil.assertSequenceDictionariesEqual(refDict, vcfDict);
}
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
if (this.outputFile == null) {
w = xof.createXMLStreamWriter(stdout(), "UTF-8");
} else {
fout = Files.newOutputStream(this.outputFile);
w = xof.createXMLStreamWriter(fout, "UTF-8");
}
final Function<List<Point2D.Double>, String> points2str = (L) -> L.stream().map(S -> format(S.getX()) + "," + format(S.getY())).collect(Collectors.joining(" "));
final Consumer<List<Point2D.Double>> simplifyPoints = (L) -> {
for (int z = 0; z + 1 < L.size(); ++z) {
if (L.get(z).getY() == L.get(z + 1).getY()) {
L.get(z).x = L.get(z + 1).x;
L.remove(z + 1);
}
}
};
w.writeStartDocument("UTF-8", "1.0");
final Dimension dim = new Dimension(this.drawinAreaWidth, 0);
final int bed_header_height = 20;
dim.height += bed_header_height;
dim.height += (int) this.bamInputs.stream().mapToDouble(B -> B.getPixelHeight()).sum();
LOG.debug("drawing area: " + dim);
w.writeStartElement("svg");
w.writeAttribute("width", String.valueOf(dim.width));
w.writeAttribute("height", String.valueOf(dim.height));
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
// https://stackoverflow.com/questions/15717970
w.writeStartElement("style");
if (this.cssFile != null) {
w.writeCharacters(IOUtil.slurp(this.cssFile));
} else {
w.writeCharacters("g.maing {stroke:black;stroke-width:0.5px;fill:whitesmoke;font-size:10pt;}\n" + "text.sampleLabel {stroke:none;stroke-width:0.5px;fill:blue;}" + "text.captureLabel {stroke:none;stroke-width:0.5px;fill:slategrey;text-anchor:middle;}" + "polygon.area {stroke:darkgray;stroke-width:0.5px;fill:url('#grad01');}" + "line.linedp {stroke:darkcyan;stroke-width:0.3px;opacity:0.4;}" + "text.linedp {fill-opacity:0.6;font-size:7px;stroke:none;stroke-width:0.5px;fill:darkcyan;}" + "rect.sampleFrame { fill:none;stroke:slategray;stroke-width:0.3px;}" + "rect.clickRgn {fill:none;stroke:none;pointer-events:all;}" + "polyline.gc {stroke:lightcoral;stroke-width:0.3px;fill:none;}" + "polyline.clipping {stroke:orange;stroke-width:0.8px;fill:none;}" + "circle.ar {fill:orange;stroke:none;}" + "circle.aa {fill:red;stroke:none;}" + "circle.rr {fill:green;stroke:none;}");
}
// style
w.writeEndElement();
w.writeStartElement("title");
w.writeCharacters(this.domSvgTitle);
w.writeEndElement();
w.writeStartElement("defs");
// alleles
final double genotype_radius = Double.parseDouble(this.dynaParams.getOrDefault("gt.radius", "1.5"));
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "rr");
w.writeAttribute("class", "rr");
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "ar");
w.writeAttribute("class", "ar");
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "aa");
w.writeAttribute("class", "aa");
// gradient
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:lightgray;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:gray;stop-opacity:1;");
w.writeEndElement();
// gc percent
for (final CaptureInterval ci : userIntervals) {
final GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ci.getContig());
final int gc_percent_width = (int) ci.getPixelWidth();
final List<Point2D.Double> points = new ArrayList<>(gc_percent_width);
for (int x = 0; x < gc_percent_width; ++x) {
int pos1 = ci.getStart() + (int) (((x + 0) / ci.getPixelWidth()) * ci.getLengthOnReference());
int pos2 = ci.getStart() + (int) (((x + 1) / ci.getPixelWidth()) * ci.getLengthOnReference());
double gc_percent = getGcPercent(genomicSequence, pos1, pos2);
double y = this.sampleTrackHeight - this.sampleTrackHeight * gc_percent;
points.add(new Point2D.Double(x, y));
}
simplifyPoints.accept(points);
w.writeStartElement("polyline");
w.writeAttribute("class", "gc");
w.writeAttribute("id", "z" + ci.getId());
w.writeAttribute("points", points2str.apply(points));
w.writeStartElement("title");
w.writeCharacters("GC %");
w.writeEndElement();
w.writeEndElement();
}
// defs
w.writeEndElement();
w.writeStartElement("script");
final StringBuilder openBrowserFunction = new StringBuilder("function openGenomeBrowser(contig,chromStart,chromEnd) {\n");
if (!this.hyperlinkType.isEmpty()) {
openBrowserFunction.append("var url=\"" + this.hyperlinkType.getPattern() + "\".replace(/__CHROM__/g,contig).replace(/__START__/g,chromStart).replace(/__END__/g,chromEnd);\n");
openBrowserFunction.append("window.open(url,'_blank');\n");
} else {
// nothing
}
openBrowserFunction.append("}\n");
w.writeCData(openBrowserFunction.toString() + "function clicked(evt,contig,chromStart,chromEnd){\n" + " var e = evt.target;\n" + " var dim = e.getBoundingClientRect();\n" + " var x = 1.0 * evt.clientX - dim.left;\n" + " var cLen = 1.0* (chromEnd - chromStart); if(cLen<1) cLen=1.0;\n" + " var pos1 = chromStart + parseInt(((x+0)/dim.width)*cLen);\n" + " var pos2 = chromStart + parseInt(((x+1)/dim.width)*cLen);\n" + " openGenomeBrowser(contig,pos1,pos2);\n" + "}\n");
// script
w.writeEndElement();
w.writeStartElement("g");
w.writeAttribute("class", "maing");
int y = 0;
w.writeStartElement("g");
w.writeComment("interval background");
for (final CaptureInterval ci : userIntervals) {
w.writeStartElement("text");
w.writeAttribute("class", "captureLabel");
w.writeAttribute("textLength", String.valueOf(ci.getPixelWidth() * 0.8));
w.writeAttribute("lengthAdjust", "spacing");
w.writeAttribute("x", String.valueOf(ci.getPixelX1() + ci.getPixelWidth() / 2.0));
w.writeAttribute("y", String.valueOf(bed_header_height - 2));
w.writeCharacters(ci.getName());
w.writeStartElement("title");
w.writeCharacters(ci.toNiceString());
// title
w.writeEndElement();
// text
w.writeEndElement();
w.writeStartElement("rect");
w.writeAttribute("style", "stroke:black;fill:none;");
w.writeAttribute("x", String.valueOf(ci.getPixelX1()));
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("height", String.valueOf(dim.height));
w.writeStartElement("title");
w.writeCharacters(ci.getName());
w.writeEndElement();
w.writeEndElement();
}
// interval background
w.writeEndElement();
y += bed_header_height;
for (final BamInput bi : this.bamInputs) {
w.writeComment(bi.bamPath.toString());
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
if (normalize_on_median_flag) {
final double medianDepth = Math.max(1.0, Percentile.median().evaluate(bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).toArray()).orElse(1.0));
LOG.info("median" + medianDepth);
for (final double[] coverage_array : bi.coverages) {
for (int px = 0; px < coverage_array.length; px++) {
coverage_array[px] /= medianDepth;
}
}
}
final double maxDepth = bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).max().orElse(1.0);
for (int ridx = 0; ridx < userIntervals.size(); ridx++) {
final CaptureInterval ci = userIntervals.get(ridx);
final String clickedAttribute = "clicked(evt,\"" + ci.getContig() + "\"," + ci.getStart() + "," + ci.getEnd() + ")";
final double[] coverage_array = bi.coverages.get(ridx);
final double leftx = ci.getPixelX1();
w.writeStartElement("g");
w.writeAttribute("transform", "translate(" + leftx + ",0)");
final int segment_width = (int) ci.getPixelWidth();
// coverage
{
final List<Point2D.Double> points = new ArrayList<>(segment_width);
points.add(new Point2D.Double(0, bi.getPixelHeight()));
for (int px = 0; px < coverage_array.length; px++) {
final double y_avg_cov = coverage_array[px];
final double new_y = bi.getPixelHeight() - (y_avg_cov / maxDepth) * bi.getPixelHeight();
points.add(new Point2D.Double(px, new_y));
}
points.add(new Point2D.Double(ci.getPixelWidth(), bi.getPixelHeight()));
simplifyPoints.accept(points);
// close
points.add(new Point2D.Double(leftx, bi.getPixelHeight()));
w.writeEmptyElement("polygon");
w.writeAttribute("class", "area");
w.writeAttribute("title", ci.toNiceString());
// w.writeAttribute("onclick", clickedAttribute);
w.writeAttribute("points", points2str.apply(points));
}
// w.writeEndElement();//g
int depthshift = 1;
for (; ; ) {
final int numdiv = (int) (maxDepth / depthshift);
if (numdiv <= 10)
break;
depthshift *= 10;
}
int depth = depthshift;
while (depth < maxDepth) {
double new_y = bi.getPixelHeight() - (depth / maxDepth) * bi.getPixelHeight();
w.writeStartElement("text");
w.writeAttribute("class", "linedp");
w.writeAttribute("x", "1");
w.writeAttribute("y", String.valueOf(new_y + 1));
w.writeCharacters(String.valueOf(depth));
// text
w.writeEndElement();
w.writeStartElement("line");
w.writeAttribute("class", "linedp");
w.writeAttribute("stroke-dasharray", "4");
w.writeAttribute("x1", "0");
w.writeAttribute("x2", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("y1", String.valueOf(new_y));
w.writeAttribute("y2", String.valueOf(new_y));
w.writeStartElement("title");
w.writeCharacters(String.valueOf(depth));
w.writeEndElement();
// line
w.writeEndElement();
depth += depthshift;
}
// polyline
w.writeEmptyElement("use");
w.writeAttribute("xlink", XLINK.NS, "href", "#z" + ci.getId());
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
// click
w.writeStartElement("rect");
w.writeAttribute("class", "clickRgn");
w.writeAttribute("onclick", clickedAttribute);
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
w.writeEndElement();
// genotype
if (vcfReader != null) {
try (CloseableIterator<VariantContext> iter = vcfReader.query(ci)) {
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
final Genotype gt = ctx.getGenotype(bi.sample);
if (gt == null)
break;
String allele_id = null;
switch(gt.getType()) {
case HET:
allele_id = "ar";
break;
case HOM_REF:
allele_id = "rr";
break;
case HOM_VAR:
allele_id = "aa";
break;
default:
allele_id = null;
break;
}
if (allele_id != null) {
w.writeEmptyElement("use");
w.writeAttribute("xlink", XLINK.NS, "href", "#" + allele_id);
w.writeAttribute("x", format(((ctx.getStart() - ci.getStart()) / (double) ci.getLengthOnReference()) * ci.getPixelWidth()));
w.writeAttribute("y", format(bi.getPixelHeight() - 2 * genotype_radius));
}
}
}
}
// g
w.writeEndElement();
}
// frame for this sample
w.writeStartElement("rect");
w.writeAttribute("class", "sampleFrame");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(dim.width));
w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
// rect
w.writeEndElement();
w.writeStartElement("text");
w.writeAttribute("class", "sampleLabel");
w.writeAttribute("x", "5");
w.writeAttribute("y", "12");
w.writeStartElement("title");
w.writeCharacters(bi.bamPath.toString());
w.writeEndElement();
w.writeCharacters(bi.sample);
// text
w.writeEndElement();
// g
w.writeEndElement();
y += bi.getPixelHeight();
}
w.writeStartElement("g");
w.writeComment("interval lines");
for (int n = 0; n <= userIntervals.size(); n++) {
w.writeEmptyElement("line");
String x1 = n < userIntervals.size() ? String.valueOf(userIntervals.get(n).getPixelX1()) : String.valueOf(dim.width);
w.writeAttribute("x1", x1);
w.writeAttribute("y1", "0");
w.writeAttribute("x2", x1);
w.writeAttribute("y2", String.valueOf(dim.height));
}
// interval lines
w.writeEndElement();
// g
w.writeEndElement();
// svg
w.writeEndElement();
w.writeEndDocument();
w.flush();
w.close();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfReader);
CloserUtil.close(w);
CloserUtil.close(fout);
CloserUtil.close(r);
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.bamInputs);
}
}
Aggregations