use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class MantaMerger method doWork.
@Override
public int doWork(final List<String> args) {
VariantContextWriter out = null;
try {
final Map<String, VcfInput> sample2inputs = new TreeMap<>();
SAMSequenceDictionary dict = null;
final List<String> lines;
if (args.size() == 1 && args.get(0).endsWith(".list")) {
lines = Files.lines(Paths.get(args.get(0))).filter(L -> !(StringUtils.isBlank(L) || L.startsWith("#"))).collect(Collectors.toList());
} else {
lines = args;
}
for (final String line : lines) {
final String[] tokens = CharSplitter.TAB.split(line);
final VcfInput vcfInput = new VcfInput();
vcfInput.vcfPath = Paths.get(tokens[0]);
IOUtil.assertFileIsReadable(vcfInput.vcfPath);
final SAMSequenceDictionary dict1 = SequenceDictionaryUtils.extractRequired(vcfInput.vcfPath);
if (dict == null) {
dict = dict1;
} else if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict1)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict1, dict);
}
if (tokens.length < 2 || StringUtils.isBlank(tokens[1])) {
try (VCFReader r = VCFReaderFactory.makeDefault().open(vcfInput.vcfPath, false)) {
List<String> snl = r.getHeader().getSampleNamesInOrder();
if (snl.size() == 1) {
vcfInput.sample = snl.get(0);
} else {
vcfInput.sample = vcfInput.vcfPath.toString();
}
}
} else {
vcfInput.sample = tokens[1];
}
if (sample2inputs.containsKey(vcfInput.sample)) {
LOG.error("duplicate sample " + vcfInput.sample);
return -1;
}
sample2inputs.put(vcfInput.sample, vcfInput);
}
if (sample2inputs.isEmpty()) {
LOG.error("no input found");
return -1;
}
if (!StringUtils.isBlank(this.limitContig) && dict.getSequence(this.limitContig) == null) {
LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(this.limitContig, dict));
return -1;
}
final Predicate<VariantContext> bedPredicate;
if (this.excludeBedPath != null) {
final BedLineCodec codec = new BedLineCodec();
final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
final IntervalTreeMap<Boolean> map = new IntervalTreeMap<>();
try (BufferedReader br = IOUtils.openPathForBufferedReading(this.excludeBedPath)) {
br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> codec.decode(L)).filter(L -> L != null).filter(B -> !StringUtils.isBlank(converter.apply(B.getContig()))).map(B -> new Interval(converter.apply(B.getContig()), B.getStart(), B.getEnd())).forEach(R -> map.put(R, true));
}
bedPredicate = V -> !map.containsOverlapping(V);
} else {
bedPredicate = V -> true;
}
final Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
final VCFInfoHeaderLine infoSvLen = new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation length");
metaData.add(infoSvLen);
final VCFInfoHeaderLine infoNSamples = new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples");
metaData.add(infoNSamples);
final VCFInfoHeaderLine infoSamples = new VCFInfoHeaderLine("SAMPLES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "amples");
metaData.add(infoSamples);
final VCFFilterHeaderLine filterSameNext = new VCFFilterHeaderLine("NEXT", "next variant in VCF is the same.");
metaData.add(filterSameNext);
final VCFFilterHeaderLine filterSamePrev = new VCFFilterHeaderLine("PREV", "next variant in VCF is the same.");
metaData.add(filterSamePrev);
final VCFHeader header = new VCFHeader(metaData, sample2inputs.keySet());
header.setSequenceDictionary(dict);
JVarkitVersion.getInstance().addMetaData(this, header);
out = VCFUtils.createVariantContextWriterToPath(this.outputFile);
out.writeHeader(header);
final Decoy decoy = Decoy.getDefaultInstance();
for (final SAMSequenceRecord ssr : dict.getSequences()) {
if (!StringUtils.isBlank(this.limitContig)) {
if (!ssr.getSequenceName().equals(this.limitContig))
continue;
}
LOG.info("contig " + ssr.getSequenceName());
if (decoy.isDecoy(ssr.getSequenceName()))
continue;
final Map<SVKey, Set<String>> variants2samples = new HashMap<>();
for (final VcfInput vcfinput : sample2inputs.values()) {
// reset count for this contig
vcfinput.contigCount = 0;
try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength()).stream().filter(V -> discard_bnd == false || !V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND")).filter(bedPredicate).map(V -> new VariantContextBuilder(V).unfiltered().noID().noGenotypes().rmAttribute("EVENT").rmAttribute("HOMSEQ").rmAttribute("HOMLEN").rmAttribute("SVINSSEQ").rmAttribute("SVINSLEN").rmAttribute("MATEID").rmAttribute("LEFT_SVINSSEQ").rmAttribute("RIGHT_SVINSSEQ").rmAttribute("BND_DEPTH").rmAttribute("MATE_BND_DEPTH").rmAttribute("JUNCTION_QUAL").rmAttribute("CIGAR").make()).forEach(V -> {
final SVKey key1 = new SVKey(V);
if (!svComparator.test(V, V))
throw new RuntimeException("compare to self failed ! " + V);
variants2samples.put(key1, new HashSet<>());
vcfinput.contigCount++;
});
}
}
if (variants2samples.isEmpty())
continue;
// build an interval tree for a faster access
final IntervalTree<SVKey> intervalTree = new IntervalTree<>();
for (final SVKey key : variants2samples.keySet()) {
final SimpleInterval r = new SimpleInterval(key.archetype).extend(this.svComparator.getBndDistance() + 1);
intervalTree.put(r.getStart(), r.getEnd(), key);
// paranoidcheck interval is ok to find current archetype
boolean found = false;
final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
while (nodeIter.hasNext()) {
final SVKey key1 = nodeIter.next().getValue();
if (this.svComparator.test(key1.archetype, key.archetype)) {
found = true;
break;
}
}
if (!found) {
out.close();
throw new RuntimeException("cannot find self " + key.archetype + " in " + r);
}
}
for (final VcfInput vcfinput : sample2inputs.values()) {
try (VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfinput.vcfPath, true)) {
final CloseableIterator<VariantContext> iter = vcfFileReader.query(ssr.getSequenceName(), 1, ssr.getSequenceLength());
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (this.discard_bnd && ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
continue;
if (!bedPredicate.test(ctx))
continue;
final SimpleInterval r = new SimpleInterval(ctx).extend(this.svComparator.getBndDistance() + 1);
final Iterator<IntervalTree.Node<SVKey>> nodeIter = intervalTree.overlappers(r.getStart(), r.getEnd());
while (nodeIter.hasNext()) {
final SVKey key1 = nodeIter.next().getValue();
if (!this.svComparator.test(key1.archetype, ctx))
continue;
final Set<String> samples = variants2samples.get(key1);
samples.add(vcfinput.sample);
}
}
iter.close();
}
}
final Comparator<VariantContext> sorter = new ContigDictComparator(dict).createLocatableComparator();
final List<SVKey> orderedKeys = variants2samples.keySet().stream().filter(// no samples for this key ??!
K -> !variants2samples.get(K).isEmpty()).sorted((A, B) -> sorter.compare(A.archetype, B.archetype)).collect(Collectors.toCollection(ArrayList::new));
// remove duplicates
int i = 0;
while (i + 1 < orderedKeys.size()) {
final SVKey key1 = orderedKeys.get(i);
final SVKey key2 = orderedKeys.get(i + 1);
if (svComparator.test(key1.archetype, key2.archetype) && // same samples
variants2samples.get(key1).equals(variants2samples.get(key2))) {
orderedKeys.remove(i + 1);
} else {
i++;
}
}
for (int key_index = 0; key_index < orderedKeys.size(); key_index++) {
final SVKey key = orderedKeys.get(key_index);
final Set<String> samples = variants2samples.get(key);
final Allele refAllele = key.archetype.getReference();
final Allele altAllele = Allele.create("<SV>", false);
final Object svType = key.archetype.getAttribute(VCFConstants.SVTYPE, ".");
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(key.archetype.getContig());
vcb.start(key.archetype.getStart());
vcb.stop(key.archetype.getEnd());
vcb.log10PError(key.archetype.getLog10PError());
vcb.alleles(Arrays.asList(refAllele, altAllele));
vcb.attribute(VCFConstants.END_KEY, key.archetype.getEnd());
vcb.attribute(VCFConstants.SVTYPE, svType);
vcb.attribute(infoSvLen.getID(), (svType.equals("DEL") ? -1 : 1) * key.archetype.getLengthOnReference());
vcb.attribute(infoNSamples.getID(), samples.size());
vcb.attribute(infoSamples.getID(), samples.stream().sorted().collect(Collectors.toList()));
int ac = 0;
final List<Genotype> genotypes = new ArrayList<>(sample2inputs.size());
for (final String sn : sample2inputs.keySet()) {
List<Allele> gta;
if (samples.contains(sn)) {
ac++;
gta = Arrays.asList(refAllele, altAllele);
} else {
gta = Arrays.asList(refAllele, refAllele);
}
genotypes.add(new GenotypeBuilder(sn, gta).make());
}
vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, ac);
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, sample2inputs.size() * 2);
if (ac > 0) {
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, ac / (float) sample2inputs.size() * 2);
}
if (key_index > 0 && svComparator.test(key.archetype, orderedKeys.get(key_index - 1).archetype)) {
vcb.filter(filterSamePrev.getID());
}
if (key_index + 1 < orderedKeys.size() && svComparator.test(key.archetype, orderedKeys.get(key_index + 1).archetype)) {
System.err.println("SAME\n" + key.archetype + "\n" + orderedKeys.get(key_index + 1).archetype);
vcb.filter(filterSameNext.getID());
}
vcb.genotypes(genotypes);
out.add(vcb.make());
}
}
out.close();
out = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CoverageServer method saveComment.
private void saveComment(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
final SimpleInterval region = parseInterval(request.getParameter("interval"));
final String comment = request.getParameter("comment");
if (this.commentPath == null || region == null || StringUtils.isBlank(comment)) {
LOG.info("region " + region + "." + request.getParameter("interval"));
LOG.info("comment " + comment + ".");
response.reset();
response.sendError(HttpStatus.BAD_REQUEST_400, "bad request");
response.flushBuffer();
return;
}
try (BufferedWriter pw = Files.newBufferedWriter(this.commentPath, StandardOpenOption.CREATE, StandardOpenOption.APPEND)) {
pw.append(region.getContig());
pw.append("\t");
pw.append(String.valueOf(region.getStart() - 1));
pw.append("\t");
pw.append(String.valueOf(region.getEnd()));
pw.append("\t");
pw.append(StringUtils.normalizeSpaces(comment));
pw.append("\t");
pw.append(StringUtils.ifBlank(this.named_intervals.stream().filter(I -> new SimpleInterval(I).equals(region)).map(R -> R.getName()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.joining("; ")), "."));
pw.append("\t");
pw.append(StringUtils.now());
pw.append('\n');
pw.flush();
response.setContentType("text/plain");
PrintWriter w = response.getWriter();
w.append("OK. Comment saved.");
w.flush();
return;
} catch (final IOException err) {
LOG.error(err);
response.reset();
response.setContentType("text/plain");
PrintWriter w = response.getWriter();
w.append("Error. Cannot save comment (" + err.getMessage() + ")");
w.flush();
return;
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CoverageServer method printPage.
/**
* print HTML page
*/
private void printPage(final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException {
String message = "";
final String charset = StringUtils.ifBlank(request.getCharacterEncoding(), "UTF-8");
response.setContentType("text/html; charset=" + charset.toLowerCase());
response.setCharacterEncoding(charset);
PrintWriter pw = response.getWriter();
SimpleInterval interval = null;
int columns_count = this.images_per_row;
if (!StringUtils.isBlank(request.getParameter("columns"))) {
columns_count = Math.max(1, StringUtils.parseInt(request.getParameter("columns")).orElse(this.images_per_row));
}
final boolean normalize = request.getParameter("normalize") != null;
if (!StringUtils.isBlank(request.getParameter("custominterval"))) {
interval = parseInterval(request.getParameter("custominterval"));
if (interval == null) {
message += " Cannot parse user interval '" + request.getParameter("custominterval") + "'.";
}
}
if (interval == null && !StringUtils.isBlank(request.getParameter("interval"))) {
interval = parseInterval(request.getParameter("interval"));
if (interval == null) {
message += " Cannot parse interval. using default: " + interval + ".";
}
}
if (interval == null && !this.named_intervals.isEmpty()) {
/* first non reviewed */
interval = this.named_intervals.stream().filter(R -> !R.reviewed).findFirst().map(R -> new SimpleInterval(R)).orElse(null);
/* all reviewed ? */
if (interval == null)
interval = new SimpleInterval(this.named_intervals.get(0));
}
/* still no interval ?*/
if (interval == null) {
final SAMSequenceRecord ssr = this.dictionary.getSequence(0);
interval = new SimpleInterval(ssr.getSequenceName(), 1, Math.min(ssr.getSequenceLength(), 100));
}
if (!StringUtils.isBlank(request.getParameter("move"))) {
final String value = request.getParameter("move");
double factor;
switch(value.length()) {
case 0:
factor = 0;
break;
case 1:
factor = 0.1;
break;
case 2:
factor = 0.475;
break;
default:
factor = 0.95;
break;
}
int length0 = interval.getLengthOnReference();
int length1 = (int) (length0 * factor);
int shift = (value.startsWith(">") ? 1 : -1) * length1;
int start = interval.getStart() + shift;
int end = interval.getEnd() + shift;
if (start < 1)
start = 1;
final SAMSequenceRecord ssr = this.dictionary.getSequence(interval.getContig());
if (ssr != null && end >= ssr.getSequenceLength()) {
end = ssr.getSequenceLength();
}
if (ssr != null && start >= ssr.getSequenceLength()) {
start = ssr.getSequenceLength();
}
interval = new SimpleInterval(interval.getContig(), start, end);
}
/* ZOOM buttons */
for (int side = 0; side < 2; ++side) {
final String param = "zoom" + (side == 0 ? "in" : "out");
String value = request.getParameter(param);
if (StringUtils.isBlank(value))
continue;
double factor = Double.parseDouble(value);
if (side == 0)
factor = 1.0 / factor;
int length0 = interval.getLengthOnReference();
int length1 = (int) (length0 * factor);
if (length1 < 1)
length1 = 1;
if (length1 > this.max_window_size)
length1 = this.max_window_size;
int mid = interval.getStart() + length0 / 2;
int start = mid - length1 / 2;
if (start < 1)
start = 1;
int end = mid + length1 / 2;
final SAMSequenceRecord ssr = this.dictionary.getSequence(interval.getContig());
if (ssr != null && end >= ssr.getSequenceLength()) {
end = ssr.getSequenceLength();
}
interval = new SimpleInterval(interval.getContig(), start, end);
break;
}
final String title = interval.toNiceString() + " (" + StringUtils.niceInt(interval.getLengthOnReference()) + " bp.)";
try {
final XMLStreamWriter w = XMLOutputFactory.newFactory().createXMLStreamWriter(pw);
w.writeStartElement("html");
w.writeStartElement("head");
w.writeEmptyElement("meta");
w.writeAttribute("charset", charset);
w.writeStartElement("title");
w.writeCharacters(title);
// title
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("body {background-color:#f0f0f0;color:#070707;font-size:18px;}" + "h1 {text-align:center;color:#070707;}" + ".grid-container {display: grid; grid-template-columns: " + IntStream.range(0, Math.max(1, columns_count)).mapToObj(X -> "auto").collect(Collectors.joining(" ")) + ";grid-gap: 10px; padding: 10px;}" + ".span1 {border:1px dotted blue;}" + ".lbl {font-weight: bold;}" + ".bampath {font-family:monospace;font-size:12px; font-style: normal;color:gray;}" + ".headerform {background-color:lavender;text-align:center;font-size:14px;}" + ".comment {background-color:khaki;text-align:center;font-size:14px;}" + ".highlight {background-color:#DB7093;}" + ".parents {font-size:75%;color:gray;}" + ".children {font-size:75%;color:gray;}" + ".allsamples {font-size:125%;}" + ".message {color:red;}" + ".affected {background-color:#e6cccc;}" + ".gtf {background-color:moccasin;text-align:center;}" + ".known {background-color:wheat;text-align:center;}");
// title
w.writeEndElement();
w.writeStartElement("script");
{
w.writeCharacters("");
w.flush();
/* messy with < and > characters + xmlstream */
pw.write("function highlight(name) {" + "var e = document.getElementById(name+\"_div\"); if(e==null) return;" + "e.classList.add(\"highlight\");" + "setTimeout(function () {e.classList.remove(\"highlight\");},2000);" + "}" + "function sendComment() {" + "var commentstr=document.getElementById(\"comment\").value;" + "console.log(\"comment is\"+commentstr);" + "if(commentstr.trim().length==0) return;" + "var xhttp = new XMLHttpRequest();" + "xhttp.onreadystatechange = function() {if (this.readyState == 4) if(this.status == 200) { alert(this.responseText);} else alert(\"Cannot send Comment.\")};" + "xhttp.open(\"GET\",\"/comment?interval=" + StringUtils.escapeHttp(interval.toString()) + "&comment=\"+encodeURI(commentstr), true);" + "xhttp.send();" + "}" + "function loadImage(idx) {" + "if(idx>=" + this.bamInput.size() + ") return;" + "var img = document.getElementById(\"bamid\"+idx);" + "img.addEventListener('load',(event) => {img.width=" + image_width + ";img.height=" + image_height + ";loadImage(idx+1);});" + "img.setAttribute(\"src\",\"/getimage?id=\"+idx+\"&interval=" + StringUtils.escapeHttp(interval.toString()) + (normalize ? "&normalize=1" : "") + "\");" + "img.setAttribute(\"alt\",\"bam idx\"+idx);" + "}" + "function init() {" + "var span=document.getElementById(\"spaninterval\");" + "if(span!=null) span.addEventListener('click',(evt)=>{document.getElementById(\"custominterval\").value = span.textContent; });" + "var sel=document.getElementById(\"selinterval\");" + "if(sel!=null) sel.addEventListener('change',(evt)=>{document.getElementById(\"custominterval\").value = evt.target.value; });" + "var cbox=document.getElementById(\"norminput\");" + "if(cbox!=null) cbox.addEventListener('change',(evt)=>{cbox.form.submit(); });" + "var comment=document.getElementById(\"commentbtn\");" + "if(comment!=null) comment.addEventListener('click',(evt)=>{ console.log(\"send comment\");sendComment(); });" + "var shortcuts=document.getElementById(\"shortcuts\");" + "if(shortcuts!=null) shortcuts.addEventListener('change',(evt)=>{document.getElementById(\"comment\").value += evt.target.value; });" + "loadImage(0);" + "}" + "window.addEventListener('load', (event) => {init();});");
pw.flush();
}
// script
w.writeEndElement();
// head
w.writeEndElement();
w.writeStartElement("body");
w.writeStartElement("h1");
w.writeCharacters(SequenceDictionaryUtils.getBuildName(this.dictionary).orElse("") + " ");
final String outlinkurl = hyperlink.apply(interval);
if (StringUtils.isBlank(outlinkurl)) {
w.writeCharacters(title);
} else {
w.writeStartElement("a");
w.writeAttribute("href", outlinkurl);
w.writeAttribute("target", "_blank");
w.writeAttribute("title", title);
w.writeCharacters(title);
w.writeEndElement();
}
// h1
w.writeEndElement();
if (!StringUtils.isBlank(message)) {
w.writeStartElement("h2");
w.writeAttribute("class", "message");
w.writeCharacters(message);
// h2
w.writeEndElement();
}
w.writeEmptyElement("a");
w.writeAttribute("name", "top");
w.writeComment("BEGIN FORM");
w.writeStartElement("div");
w.writeAttribute("class", "headerform");
w.writeStartElement("form");
w.writeAttribute("method", "GET");
w.writeAttribute("action", "/page");
w.writeEmptyElement("input");
w.writeAttribute("name", "interval");
w.writeAttribute("value", interval.toString());
w.writeAttribute("type", "hidden");
/* number of images per row */
if (!StringUtils.isBlank(request.getParameter("columns"))) {
w.writeEmptyElement("input");
w.writeAttribute("name", "columns");
w.writeAttribute("value", String.valueOf(columns_count));
w.writeAttribute("type", "hidden");
}
/* write select box with predefined interval */
if (!this.named_intervals.isEmpty()) {
w.writeStartElement("select");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "selinterval");
w.writeEmptyElement("option");
for (int side = 0; side < 2; ++side) {
for (final ReviewedInterval r : this.named_intervals) {
if (side == 0 && r.reviewed)
continue;
if (side == 1 && !r.reviewed)
continue;
final SimpleInterval simple = new SimpleInterval(r);
w.writeStartElement("option");
w.writeAttribute("value", simple.toString());
if (simple.equals(interval)) {
r.reviewed = true;
w.writeAttribute("selected", "true");
}
if (r.reviewed) {
w.writeEntityRef("#x2713");
w.writeCharacters(" ");
}
w.writeCharacters(simple.toNiceString() + (StringUtils.isBlank(r.getName()) ? "" : " [" + r.getName() + "]"));
w.writeEndElement();
w.writeCharacters("\n");
}
}
// select
w.writeEndElement();
}
w.writeEmptyElement("br");
//
/* move */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("move");
w.writeEndElement();
for (final String mv : new String[] { "<<<", "<<", "<", ">", ">>", ">>>" }) {
w.writeEmptyElement("input");
w.writeAttribute("class", "btn");
w.writeAttribute("type", "submit");
w.writeAttribute("name", "move");
w.writeAttribute("value", mv);
}
/* zoom in */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("zoom in");
w.writeEndElement();
for (final String zoom : new String[] { "1.5", "3", "10" }) {
w.writeEmptyElement("input");
w.writeAttribute("class", "btn");
w.writeAttribute("type", "submit");
w.writeAttribute("name", "zoomin");
w.writeAttribute("value", zoom);
}
/* zoom in */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("zoom out");
w.writeEndElement();
for (final String zoom : new String[] { "1.5", "3", "10", "100" }) {
w.writeEmptyElement("input");
w.writeAttribute("type", "submit");
w.writeAttribute("class", "btn");
w.writeAttribute("name", "zoomout");
w.writeAttribute("value", zoom);
}
/* checkbox normalize */
w.writeEmptyElement("input");
w.writeAttribute("id", "norminput");
w.writeAttribute("name", "normalize");
w.writeAttribute("type", "checkbox");
if (normalize)
w.writeAttribute("checked", "true");
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeAttribute("for", "norminput");
w.writeCharacters("normalize");
w.writeEndElement();
w.writeEmptyElement("br");
w.writeStartElement("span");
w.writeAttribute("class", "span1");
w.writeAttribute("id", "spaninterval");
w.writeCharacters(interval.toNiceString());
// span
w.writeEndElement();
w.writeEmptyElement("input");
w.writeAttribute("name", "custominterval");
w.writeAttribute("id", "custominterval");
w.writeAttribute("type", "text");
w.writeAttribute("value", "");
w.writeStartElement("button");
w.writeAttribute("class", "btn");
w.writeAttribute("name", "parseinterval");
w.writeCharacters("GO");
// button
w.writeEndElement();
// form
w.writeEndElement();
// div
w.writeEndElement();
if (this.commentPath != null) {
w.writeStartElement("div");
w.writeAttribute("class", "comment");
w.writeStartElement("label");
w.writeAttribute("for", "comment");
w.writeCharacters("Comment:");
w.writeEndElement();
w.writeEmptyElement("input");
w.writeAttribute("id", "comment");
w.writeAttribute("type", "text");
w.writeAttribute("placeholder", "Comment about " + interval.toNiceString());
w.writeStartElement("button");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "commentbtn");
w.writeCharacters("Send comment");
// button
w.writeEndElement();
w.writeCharacters(" ");
w.writeStartElement("select");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "shortcuts");
w.writeEmptyElement("option");
for (final String opt : new String[] { "OK", "BAD", "Noisy", "False positive", "Deletion", "Later" }) {
w.writeStartElement("option");
w.writeAttribute("value", " " + opt + ".");
w.writeCharacters(opt);
w.writeEndElement();
}
// select
w.writeEndElement();
// div
w.writeEndElement();
}
w.writeComment("END FORM");
if (this.gtfFile != null) {
w.writeStartElement("div");
w.writeAttribute("class", "gtf");
getGenes(interval).filter(G -> G.getType().equals("gene")).forEach(gtfLine -> {
try {
String key = null;
if (gtfLine.getAttributes().containsKey("gene_id")) {
key = gtfLine.getAttribute("gene_id");
}
if (gtfLine.getAttributes().containsKey("gene_name")) {
key = gtfLine.getAttribute("gene_name");
}
if (StringUtils.isBlank(key))
return;
final String url = "https://www.ncbi.nlm.nih.gov/gene/?term=" + StringUtils.escapeHttp(key);
w.writeStartElement("a");
w.writeAttribute("href", url);
w.writeAttribute("target", "_blank");
w.writeAttribute("title", key);
w.writeCharacters(key);
w.writeEndElement();
w.writeCharacters(" ");
} catch (final XMLStreamException err) {
}
});
// div
w.writeEndElement();
}
if (this.knownCnvFile != null) {
final Interval final_interval = new Interval(interval);
final ToDoubleFunction<Interval> getOvelap = R -> {
if (!R.overlaps(final_interval))
return 0;
final double L1 = R.getIntersectionLength(final_interval);
final double r1 = L1 / R.getLengthOnReference();
final double r2 = L1 / final_interval.getLengthOnReference();
return Double.min(r1, r2);
};
w.writeStartElement("div");
w.writeAttribute("class", "known");
w.writeStartElement("label");
w.writeCharacters("Known: ");
w.writeEndElement();
getKnownCnv(interval).sorted((A, B) -> Double.compare(getOvelap.applyAsDouble(B), (getOvelap.applyAsDouble(A)))).limit(100).forEach(R -> {
try {
w.writeStartElement("span");
w.writeCharacters(new SimpleInterval(R).toNiceString());
if (!StringUtils.isBlank(R.getName())) {
w.writeCharacters(" ");
w.writeCharacters(R.getName());
}
w.writeEndElement();
w.writeCharacters("; ");
// span
w.writeEndElement();
} catch (final Throwable err) {
LOG.warn(err);
}
});
// div
w.writeEndElement();
}
/* write anchors to samples */
w.writeStartElement("div");
w.writeAttribute("class", "allsamples");
w.writeCharacters("Samples: ");
for (final BamInput bi : this.bamInput.stream().sorted((A, B) -> A.sample.compareTo(B.sample)).collect(Collectors.toList())) {
w.writeStartElement("a");
w.writeAttribute("title", bi.sample);
w.writeAttribute("href", "#" + bi.sample);
w.writeAttribute("onclick", "highlight('" + bi.sample + "');");
w.writeCharacters("[" + bi.sample);
if (pedigree != null) {
final Sample sn = this.pedigree.getSampleById(bi.sample);
if (sn != null && sn.isAffected()) {
w.writeCharacters(" ");
w.writeEntityRef("#128577");
}
}
w.writeCharacters("] ");
w.writeEndElement();
}
// div
w.writeEndElement();
w.writeCharacters("\n");
w.writeStartElement("div");
w.writeAttribute("class", "grid-container");
for (int i = 0; i < this.bamInput.size(); i++) {
final BamInput bamInput = this.bamInput.get(i);
final Path bam = bamInput.bamPath;
final Sample sample = this.pedigree == null ? null : this.pedigree.getSampleById(bamInput.sample);
w.writeStartElement("div");
w.writeAttribute("id", bamInput.sample + "_div");
w.writeAttribute("class", "sample" + (sample != null && sample.isAffected() ? " affected" : ""));
w.writeEmptyElement("a");
w.writeAttribute("name", bamInput.sample);
w.writeStartElement("h3");
w.writeCharacters(bamInput.sample);
w.writeCharacters(" ");
w.writeStartElement("a");
w.writeAttribute("href", "#top");
w.writeAttribute("title", "top");
w.writeCharacters("[^]");
w.writeEndElement();
{
if (sample != null) {
writeSample(w, sample);
w.writeStartElement("span");
w.writeAttribute("class", "parents");
for (int p = 0; p < 2; ++p) {
if (p == 0 && !sample.hasFather())
continue;
if (p == 1 && !sample.hasMother())
continue;
final Sample parent = (p == 0 ? sample.getFather() : sample.getMother());
boolean has_bam = this.bamInput.stream().anyMatch(B -> B.sample.equals(parent.getId()));
w.writeCharacters(" ");
w.writeCharacters(p == 0 ? "Father " : "Mother ");
if (has_bam) {
w.writeStartElement("a");
w.writeAttribute("href", "#" + parent.getId());
w.writeAttribute("title", parent.getId());
w.writeAttribute("onclick", "highlight('" + parent.getId() + "');");
w.writeCharacters("[" + parent.getId() + "].");
w.writeEndElement();
} else {
w.writeCharacters(parent.getId());
}
writeSample(w, parent);
}
w.writeEndElement();
final Set<Sample> children = sample.getChildren();
if (!children.isEmpty()) {
w.writeStartElement("span");
w.writeAttribute("class", "children");
w.writeCharacters(" Has Children :");
for (final Sample child : children) {
boolean has_bam = this.bamInput.stream().anyMatch(B -> B.sample.equals(child.getId()));
w.writeCharacters(" ");
if (has_bam) {
w.writeStartElement("a");
w.writeAttribute("href", "#" + child.getId());
w.writeAttribute("title", child.getId());
w.writeAttribute("onclick", "highlight('" + child.getId() + "');");
w.writeCharacters("[" + child.getId() + "].");
w.writeEndElement();
} else {
w.writeCharacters(child.getId());
}
writeSample(w, child);
}
w.writeEndElement();
}
// end has children
}
}
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("class", "bampath");
w.writeCharacters(bam.toString());
w.writeEndElement();
w.writeEmptyElement("img");
w.writeAttribute("id", "bamid" + i);
w.writeAttribute("src", "https://upload.wikimedia.org/wikipedia/commons/d/de/Ajax-loader.gif");
w.writeAttribute("width", "32");
w.writeAttribute("height", "32");
// div
w.writeEndElement();
w.writeCharacters("\n");
}
w.writeEndElement();
w.writeEmptyElement("hr");
w.writeStartElement("div");
w.writeCharacters("Author: Pierre Lindenbaum. ");
w.writeCharacters(JVarkitVersion.getInstance().getLabel());
w.writeEndElement();
// body
w.writeEndElement();
// html
w.writeEndElement();
w.flush();
w.close();
} catch (XMLStreamException err) {
LOG.warn(err);
throw new IOException(err);
} finally {
pw.close();
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CoverageServer method printImage.
private void printImage(final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException {
int bam_id;
try {
bam_id = Integer.parseInt(StringUtils.ifBlank(request.getParameter("id"), "-1"));
} catch (Exception err) {
bam_id = -1;
}
final SimpleInterval midRegion = parseInterval(request.getParameter("interval"));
if (midRegion == null || bam_id < 0 || bam_id >= this.bamInput.size()) {
response.reset();
response.sendError(HttpStatus.BAD_REQUEST_400, "id:" + bam_id);
response.flushBuffer();
return;
}
final int extend = (int) (midRegion.getLengthOnReference() * this.extend_factor);
int xstart = Math.max(midRegion.getStart() - extend, 0);
int xend = midRegion.getEnd() + extend;
final SAMSequenceRecord ssr = this.dictionary.getSequence(midRegion.getContig());
if (ssr != null) {
xend = Math.min(xend, ssr.getSequenceLength());
}
final SimpleInterval region = new SimpleInterval(midRegion.getContig(), xstart, xend);
if (region.getLengthOnReference() > this.max_window_size) {
response.reset();
response.sendError(HttpStatus.BAD_REQUEST_400, "contig:" + midRegion);
response.flushBuffer();
return;
}
final Counter<Arc> sashimiArcs = new Counter<>();
final BamInput bam = this.bamInput.get(bam_id);
if (region.length() <= this.small_region_size) {
printRaster(bam, midRegion, region, request, response);
return;
}
final boolean normalize = request.getParameter("normalize") != null;
final SamReaderFactory srf = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidxRef);
try (SamReader sr = srf.open(bam.bamPath)) {
final int[] int_coverage = new int[region.getLengthOnReference()];
Arrays.fill(int_coverage, 0);
try (CloseableIterator<SAMRecord> iter = sr.query(region.getContig(), region.getStart(), region.getEnd(), false)) {
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (!acceptRead(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
int ref = rec.getAlignmentStart();
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
if (op.consumesReferenceBases()) {
if (this.enable_sashimi && op.equals(CigarOperator.N)) {
sashimiArcs.incr(new Arc(ref, ref + ce.getLength()));
}
if (op.consumesReadBases()) {
for (int x = 0; x < ce.getLength(); ++x) {
int pos = ref + x;
if (pos < region.getStart())
continue;
if (pos > region.getEnd())
break;
int_coverage[pos - region.getStart()]++;
}
}
ref += ce.getLength();
}
}
}
}
/* smooth coverage */
if (int_coverage.length > image_width) {
final int[] copy = Arrays.copyOf(int_coverage, int_coverage.length);
final int len = Math.max(1, int_coverage.length / 100);
for (int i = 0; i < int_coverage.length; i++) {
int j = Math.max(0, i - len);
double sum = 0;
int count = 0;
while (j < i + len && j < copy.length) {
sum += copy[j];
j++;
count++;
}
int_coverage[i] = (int) (sum / count);
}
}
final double[] norm_coverage = new double[int_coverage.length];
final double median;
/* normalize on median */
if (normalize) {
final Coverage leftrightcov = new Coverage(extend * 2);
for (int x = region.getStart(); x < midRegion.getStart(); x++) {
final int idx = x - region.getStart();
leftrightcov.add(int_coverage[idx]);
}
for (int x = midRegion.getEnd() + 1; x <= region.getEnd(); x++) {
final int idx = x - region.getStart();
leftrightcov.add(int_coverage[idx]);
}
median = Math.max(1.0, leftrightcov.median());
// LOG.info("median is "+median+" "+leftrightcov.median());
for (int x = 0; x < int_coverage.length; ++x) {
norm_coverage[x] = int_coverage[x] / median;
}
} else /* no normalisation */
{
/* won't be used */
median = Double.NaN;
for (int x = 0; x < int_coverage.length; ++x) {
norm_coverage[x] = int_coverage[x];
}
}
final double real_max_cov = DoubleStream.of(norm_coverage).max().orElse(1.0);
final double max_cov = Math.max((normalize ? 2 : 10), real_max_cov);
final double pixelperbase = image_width / (double) norm_coverage.length;
final IntFunction<Double> pos2pixel = POS -> ((POS - region.getStart()) / (double) region.getLengthOnReference()) * image_width;
final BufferedImage img = new BufferedImage(image_width, image_height, BufferedImage.TYPE_INT_RGB);
final Graphics2D g = img.createGraphics();
g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g.setColor(Color.WHITE);
g.fillRect(0, 0, image_width + 1, image_height + 1);
for (int x = 0; x < norm_coverage.length; ++x) {
final double height = image_height * (norm_coverage[x] / max_cov);
if (normalize)
g.setColor(Color.DARK_GRAY);
else if (max_cov < 10)
g.setColor(Color.RED);
else if (max_cov < 20)
g.setColor(Color.BLUE);
else
g.setColor(Color.DARK_GRAY);
g.fill(new Rectangle2D.Double(x * pixelperbase, image_height - height, pixelperbase, height));
}
g.setColor(Color.DARK_GRAY);
g.drawString("max-cov:" + IntStream.of(int_coverage).max().orElse(0) + (normalize ? " normalized on median (" + median + ")" : "") + " sample:" + bam.sample + " " + region.toNiceString(), 10, 10);
/* ticks for vertical axis */
g.setColor(Color.MAGENTA);
for (int i = 1; i < 10; i++) {
double cov = max_cov / 10.0 * i;
if (!normalize)
cov = Math.ceil(cov);
final double y = image_height - image_height / 10.0 * i;
if (!normalize && i > 0 && (int) cov == Math.ceil(max_cov / 10.0 * (i - 1)))
continue;
g.drawLine(0, (int) y, 5, (int) y);
g.drawString(normalize ? String.format("%.2f", cov) : String.valueOf((int) cov), 7, (int) y);
}
/* vertical line for original view */
g.setColor(Color.PINK);
double vertical = ((midRegion.getStart() - region.getStart()) / (double) region.getLengthOnReference()) * image_width;
g.draw(new Line2D.Double(vertical, 0, vertical, image_height));
vertical = ((midRegion.getEnd() - region.getStart()) / (double) region.getLengthOnReference()) * image_width;
g.draw(new Line2D.Double(vertical, 0, vertical, image_height));
if (normalize) {
/* horizontal line for median 0.5 / 1 / 1.5 */
for (int t = 1; t < 4; ++t) {
g.setColor(t == 2 ? Color.ORANGE : Color.PINK);
final double mediany = image_height - ((0.5 * t) / max_cov) * image_height;
g.draw(new Line2D.Double(0, mediany, image_width, mediany));
}
}
if (this.enable_sashimi && !sashimiArcs.isEmpty()) {
final double max_count = sashimiArcs.getMaxCount().orElse(1L);
g.setColor(Color.GREEN);
for (final Arc arc : sashimiArcs.keySet()) {
final double x1 = pos2pixel.apply(arc.start);
final double x2 = pos2pixel.apply(arc.end);
final double distance = x2 - x1;
final GeneralPath curve = new GeneralPath();
curve.moveTo(x1, image_height);
curve.curveTo(x1, image_height, x1 + distance / 2.0, image_height - Math.min(distance, image_height * 0.75), x2, image_height);
final double weight = (sashimiArcs.count(arc) / max_count) * 5;
final Stroke oldStroke = g.getStroke();
final Composite oldComposite = g.getComposite();
g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.5f));
g.setStroke(new BasicStroke((float) weight, BasicStroke.CAP_ROUND, BasicStroke.JOIN_ROUND));
g.draw(curve);
g.setStroke(oldStroke);
g.setComposite(oldComposite);
}
}
writeGenes(g, region);
writeKnownCnv(g, region);
g.setColor(Color.GRAY);
g.drawRect(0, 0, img.getWidth(), img.getHeight());
writeImage(img, bam, region, response);
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CoverageServer method parseInterval.
private SimpleInterval parseInterval(final String s) {
if (StringUtils.isBlank(s))
return null;
/* search in GTF file */
if (this.gtfFile != null && !s.contains(":")) {
final String geneName = s.trim();
TabixReader tbr = null;
try {
final ContigNameConverter cvt = ContigNameConverter.fromOneDictionary(this.dictionary);
tbr = new TabixReader(this.gtfFile.toString());
final GTFCodec codec = new GTFCodec();
String line;
while ((line = tbr.readLine()) != null) {
if (StringUtils.isBlank(line) || line.startsWith("#"))
continue;
final String[] tokens = CharSplitter.TAB.split(line);
if (tokens.length < 9)
continue;
if (!(tokens[2].equals("gene") || tokens[2].equals("transcript")))
continue;
if (StringUtils.indexOfIgnoreCase(tokens[8], geneName) == -1)
continue;
final GTFLine gtfLine = codec.decode(line);
if (gtfLine == null)
continue;
if (tokens[2].equals("gene")) {
if (!(geneName.equals(gtfLine.getAttribute("gene_id")) || geneName.equals(gtfLine.getAttribute("gene_name"))))
continue;
} else if (tokens[2].equals("transcript")) {
if (!(geneName.equals(gtfLine.getAttribute("transcript_id"))))
continue;
}
final String ctg = cvt.apply(gtfLine.getContig());
if (StringUtils.isBlank(ctg))
continue;
tbr.close();
tbr = null;
return new SimpleInterval(ctg, gtfLine.getStart(), gtfLine.getEnd());
}
} catch (final Throwable err) {
return null;
} finally {
if (tbr != null)
tbr.close();
}
}
return IntervalParserFactory.newInstance(this.dictionary).enableSinglePoint().make().apply(s.trim()).orElse(null);
}
Aggregations