use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.
the class VcfFisherCombinatorics method doWork.
@Override
public int doWork(final List<String> args) {
try {
final Pedigree pedigree = new PedigreeParser().parse(this.pedigreeFile);
final IntervalTreeMap<List<GeneInfo>> geneInfoMap = new IntervalTreeMap<>();
final Predicate<Genotype> isGtCnv = G -> G.isHet() || G.isHomVar();
try (VCFIterator vcfIter = super.openVCFIterator(super.oneFileOrNull(args))) {
final VCFHeader header = vcfIter.getHeader();
final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(header));
if (!header.hasGenotypingData()) {
LOG.error("No Genotype data in " + header);
return -1;
}
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final List<Integer> casesIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isAffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("cases N=" + casesIdx.size());
if (casesIdx.isEmpty()) {
LOG.error("No affected/cases sample in the input VCF");
return -1;
}
final List<Integer> controlsIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isUnaffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("controls N=" + controlsIdx.size());
if (controlsIdx.isEmpty()) {
LOG.error("No unaffected/controls sample in the input VCF");
return -1;
}
final Predicate<VariantContext> acceptCtx = V -> {
if (discard_bnd && V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
return false;
if (discard_filtered && V.isFiltered())
return false;
if (max_af < 1.0) {
final Iterator<Integer> it = Stream.concat(controlsIdx.stream(), casesIdx.stream()).iterator();
int ac = 0;
int an = 0;
while (it.hasNext()) {
switch(V.getGenotype(it.next()).getType()) {
case HOM_VAR:
ac += 2;
an += 2;
break;
case HOM_REF:
ac += 0;
an += 2;
break;
case HET:
ac += 1;
an += 2;
break;
default:
break;
}
}
if (an == 0)
return false;
if (ac / (double) an > max_af)
return false;
}
return true;
};
try (BedLineReader br = new BedLineReader(this.bedPath)) {
while (br.hasNext()) {
final BedLine bed = br.next();
final String ctg = ctgConverter.apply(bed.getContig());
if (StringUtil.isBlank(ctg))
continue;
final GeneInfo geneInfo = new GeneInfo();
geneInfo.casesflags = new BitSet(casesIdx.size());
geneInfo.controlsflags = new BitSet(controlsIdx.size());
geneInfo.contig = ctg;
geneInfo.name = bed.getOrDefault(3, "undefined");
geneInfo.parts.add(new SimpleInterval(bed).renameContig(ctg));
final Interval key = new Interval(geneInfo);
List<GeneInfo> L = geneInfoMap.get(key);
if (L == null) {
L = new ArrayList<>();
geneInfoMap.put(key, L);
}
L.add(geneInfo);
}
}
if (geneInfoMap.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("reading variants...");
while (vcfIter.hasNext()) {
final VariantContext ctx = vcfIter.next();
if (!acceptCtx.test(ctx))
continue;
for (List<GeneInfo> gil : geneInfoMap.getOverlapping(ctx)) {
for (GeneInfo gi : gil) {
for (int y = 0; y < casesIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(casesIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.casesflags.set(y);
}
for (int y = 0; y < controlsIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(controlsIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.controlsflags.set(y);
}
}
}
}
/* remove Genes without variant , count sample per gene*/
for (List<GeneInfo> gil : geneInfoMap.values()) {
gil.removeIf(GI -> GI.casesflags.nextSetBit(0) == -1 && GI.controlsflags.nextSetBit(0) == -1);
}
// end remove
/* load bed file */
final List<GeneInfo> geneList = geneInfoMap.values().stream().filter(L -> !L.isEmpty()).flatMap(L -> L.stream()).sorted().collect(Collectors.toList());
if (geneList.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("N Genes=" + geneList.size());
Solution best = null;
for (int i = 2; i < Math.min(this.max_num_genes, geneList.size()); i++) {
LOG.info("sarting loop from " + i);
best = recursion(geneList, casesIdx, controlsIdx, new ArrayList<>(), i, best);
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.println(best);
pw.flush();
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.
the class CoverageServer method printPage.
/**
* print HTML page
*/
private void printPage(final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException {
String message = "";
final String charset = StringUtils.ifBlank(request.getCharacterEncoding(), "UTF-8");
response.setContentType("text/html; charset=" + charset.toLowerCase());
response.setCharacterEncoding(charset);
PrintWriter pw = response.getWriter();
SimpleInterval interval = null;
int columns_count = this.images_per_row;
if (!StringUtils.isBlank(request.getParameter("columns"))) {
columns_count = Math.max(1, StringUtils.parseInt(request.getParameter("columns")).orElse(this.images_per_row));
}
final boolean normalize = request.getParameter("normalize") != null;
if (!StringUtils.isBlank(request.getParameter("custominterval"))) {
interval = parseInterval(request.getParameter("custominterval"));
if (interval == null) {
message += " Cannot parse user interval '" + request.getParameter("custominterval") + "'.";
}
}
if (interval == null && !StringUtils.isBlank(request.getParameter("interval"))) {
interval = parseInterval(request.getParameter("interval"));
if (interval == null) {
message += " Cannot parse interval. using default: " + interval + ".";
}
}
if (interval == null && !this.named_intervals.isEmpty()) {
/* first non reviewed */
interval = this.named_intervals.stream().filter(R -> !R.reviewed).findFirst().map(R -> new SimpleInterval(R)).orElse(null);
/* all reviewed ? */
if (interval == null)
interval = new SimpleInterval(this.named_intervals.get(0));
}
/* still no interval ?*/
if (interval == null) {
final SAMSequenceRecord ssr = this.dictionary.getSequence(0);
interval = new SimpleInterval(ssr.getSequenceName(), 1, Math.min(ssr.getSequenceLength(), 100));
}
if (!StringUtils.isBlank(request.getParameter("move"))) {
final String value = request.getParameter("move");
double factor;
switch(value.length()) {
case 0:
factor = 0;
break;
case 1:
factor = 0.1;
break;
case 2:
factor = 0.475;
break;
default:
factor = 0.95;
break;
}
int length0 = interval.getLengthOnReference();
int length1 = (int) (length0 * factor);
int shift = (value.startsWith(">") ? 1 : -1) * length1;
int start = interval.getStart() + shift;
int end = interval.getEnd() + shift;
if (start < 1)
start = 1;
final SAMSequenceRecord ssr = this.dictionary.getSequence(interval.getContig());
if (ssr != null && end >= ssr.getSequenceLength()) {
end = ssr.getSequenceLength();
}
if (ssr != null && start >= ssr.getSequenceLength()) {
start = ssr.getSequenceLength();
}
interval = new SimpleInterval(interval.getContig(), start, end);
}
/* ZOOM buttons */
for (int side = 0; side < 2; ++side) {
final String param = "zoom" + (side == 0 ? "in" : "out");
String value = request.getParameter(param);
if (StringUtils.isBlank(value))
continue;
double factor = Double.parseDouble(value);
if (side == 0)
factor = 1.0 / factor;
int length0 = interval.getLengthOnReference();
int length1 = (int) (length0 * factor);
if (length1 < 1)
length1 = 1;
if (length1 > this.max_window_size)
length1 = this.max_window_size;
int mid = interval.getStart() + length0 / 2;
int start = mid - length1 / 2;
if (start < 1)
start = 1;
int end = mid + length1 / 2;
final SAMSequenceRecord ssr = this.dictionary.getSequence(interval.getContig());
if (ssr != null && end >= ssr.getSequenceLength()) {
end = ssr.getSequenceLength();
}
interval = new SimpleInterval(interval.getContig(), start, end);
break;
}
final String title = interval.toNiceString() + " (" + StringUtils.niceInt(interval.getLengthOnReference()) + " bp.)";
try {
final XMLStreamWriter w = XMLOutputFactory.newFactory().createXMLStreamWriter(pw);
w.writeStartElement("html");
w.writeStartElement("head");
w.writeEmptyElement("meta");
w.writeAttribute("charset", charset);
w.writeStartElement("title");
w.writeCharacters(title);
// title
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("body {background-color:#f0f0f0;color:#070707;font-size:18px;}" + "h1 {text-align:center;color:#070707;}" + ".grid-container {display: grid; grid-template-columns: " + IntStream.range(0, Math.max(1, columns_count)).mapToObj(X -> "auto").collect(Collectors.joining(" ")) + ";grid-gap: 10px; padding: 10px;}" + ".span1 {border:1px dotted blue;}" + ".lbl {font-weight: bold;}" + ".bampath {font-family:monospace;font-size:12px; font-style: normal;color:gray;}" + ".headerform {background-color:lavender;text-align:center;font-size:14px;}" + ".comment {background-color:khaki;text-align:center;font-size:14px;}" + ".highlight {background-color:#DB7093;}" + ".parents {font-size:75%;color:gray;}" + ".children {font-size:75%;color:gray;}" + ".allsamples {font-size:125%;}" + ".message {color:red;}" + ".affected {background-color:#e6cccc;}" + ".gtf {background-color:moccasin;text-align:center;}" + ".known {background-color:wheat;text-align:center;}");
// title
w.writeEndElement();
w.writeStartElement("script");
{
w.writeCharacters("");
w.flush();
/* messy with < and > characters + xmlstream */
pw.write("function highlight(name) {" + "var e = document.getElementById(name+\"_div\"); if(e==null) return;" + "e.classList.add(\"highlight\");" + "setTimeout(function () {e.classList.remove(\"highlight\");},2000);" + "}" + "function sendComment() {" + "var commentstr=document.getElementById(\"comment\").value;" + "console.log(\"comment is\"+commentstr);" + "if(commentstr.trim().length==0) return;" + "var xhttp = new XMLHttpRequest();" + "xhttp.onreadystatechange = function() {if (this.readyState == 4) if(this.status == 200) { alert(this.responseText);} else alert(\"Cannot send Comment.\")};" + "xhttp.open(\"GET\",\"/comment?interval=" + StringUtils.escapeHttp(interval.toString()) + "&comment=\"+encodeURI(commentstr), true);" + "xhttp.send();" + "}" + "function loadImage(idx) {" + "if(idx>=" + this.bamInput.size() + ") return;" + "var img = document.getElementById(\"bamid\"+idx);" + "img.addEventListener('load',(event) => {img.width=" + image_width + ";img.height=" + image_height + ";loadImage(idx+1);});" + "img.setAttribute(\"src\",\"/getimage?id=\"+idx+\"&interval=" + StringUtils.escapeHttp(interval.toString()) + (normalize ? "&normalize=1" : "") + "\");" + "img.setAttribute(\"alt\",\"bam idx\"+idx);" + "}" + "function init() {" + "var span=document.getElementById(\"spaninterval\");" + "if(span!=null) span.addEventListener('click',(evt)=>{document.getElementById(\"custominterval\").value = span.textContent; });" + "var sel=document.getElementById(\"selinterval\");" + "if(sel!=null) sel.addEventListener('change',(evt)=>{document.getElementById(\"custominterval\").value = evt.target.value; });" + "var cbox=document.getElementById(\"norminput\");" + "if(cbox!=null) cbox.addEventListener('change',(evt)=>{cbox.form.submit(); });" + "var comment=document.getElementById(\"commentbtn\");" + "if(comment!=null) comment.addEventListener('click',(evt)=>{ console.log(\"send comment\");sendComment(); });" + "var shortcuts=document.getElementById(\"shortcuts\");" + "if(shortcuts!=null) shortcuts.addEventListener('change',(evt)=>{document.getElementById(\"comment\").value += evt.target.value; });" + "loadImage(0);" + "}" + "window.addEventListener('load', (event) => {init();});");
pw.flush();
}
// script
w.writeEndElement();
// head
w.writeEndElement();
w.writeStartElement("body");
w.writeStartElement("h1");
w.writeCharacters(SequenceDictionaryUtils.getBuildName(this.dictionary).orElse("") + " ");
final String outlinkurl = hyperlink.apply(interval);
if (StringUtils.isBlank(outlinkurl)) {
w.writeCharacters(title);
} else {
w.writeStartElement("a");
w.writeAttribute("href", outlinkurl);
w.writeAttribute("target", "_blank");
w.writeAttribute("title", title);
w.writeCharacters(title);
w.writeEndElement();
}
// h1
w.writeEndElement();
if (!StringUtils.isBlank(message)) {
w.writeStartElement("h2");
w.writeAttribute("class", "message");
w.writeCharacters(message);
// h2
w.writeEndElement();
}
w.writeEmptyElement("a");
w.writeAttribute("name", "top");
w.writeComment("BEGIN FORM");
w.writeStartElement("div");
w.writeAttribute("class", "headerform");
w.writeStartElement("form");
w.writeAttribute("method", "GET");
w.writeAttribute("action", "/page");
w.writeEmptyElement("input");
w.writeAttribute("name", "interval");
w.writeAttribute("value", interval.toString());
w.writeAttribute("type", "hidden");
/* number of images per row */
if (!StringUtils.isBlank(request.getParameter("columns"))) {
w.writeEmptyElement("input");
w.writeAttribute("name", "columns");
w.writeAttribute("value", String.valueOf(columns_count));
w.writeAttribute("type", "hidden");
}
/* write select box with predefined interval */
if (!this.named_intervals.isEmpty()) {
w.writeStartElement("select");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "selinterval");
w.writeEmptyElement("option");
for (int side = 0; side < 2; ++side) {
for (final ReviewedInterval r : this.named_intervals) {
if (side == 0 && r.reviewed)
continue;
if (side == 1 && !r.reviewed)
continue;
final SimpleInterval simple = new SimpleInterval(r);
w.writeStartElement("option");
w.writeAttribute("value", simple.toString());
if (simple.equals(interval)) {
r.reviewed = true;
w.writeAttribute("selected", "true");
}
if (r.reviewed) {
w.writeEntityRef("#x2713");
w.writeCharacters(" ");
}
w.writeCharacters(simple.toNiceString() + (StringUtils.isBlank(r.getName()) ? "" : " [" + r.getName() + "]"));
w.writeEndElement();
w.writeCharacters("\n");
}
}
// select
w.writeEndElement();
}
w.writeEmptyElement("br");
//
/* move */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("move");
w.writeEndElement();
for (final String mv : new String[] { "<<<", "<<", "<", ">", ">>", ">>>" }) {
w.writeEmptyElement("input");
w.writeAttribute("class", "btn");
w.writeAttribute("type", "submit");
w.writeAttribute("name", "move");
w.writeAttribute("value", mv);
}
/* zoom in */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("zoom in");
w.writeEndElement();
for (final String zoom : new String[] { "1.5", "3", "10" }) {
w.writeEmptyElement("input");
w.writeAttribute("class", "btn");
w.writeAttribute("type", "submit");
w.writeAttribute("name", "zoomin");
w.writeAttribute("value", zoom);
}
/* zoom in */
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeCharacters("zoom out");
w.writeEndElement();
for (final String zoom : new String[] { "1.5", "3", "10", "100" }) {
w.writeEmptyElement("input");
w.writeAttribute("type", "submit");
w.writeAttribute("class", "btn");
w.writeAttribute("name", "zoomout");
w.writeAttribute("value", zoom);
}
/* checkbox normalize */
w.writeEmptyElement("input");
w.writeAttribute("id", "norminput");
w.writeAttribute("name", "normalize");
w.writeAttribute("type", "checkbox");
if (normalize)
w.writeAttribute("checked", "true");
w.writeStartElement("label");
w.writeAttribute("class", "lbl");
w.writeAttribute("for", "norminput");
w.writeCharacters("normalize");
w.writeEndElement();
w.writeEmptyElement("br");
w.writeStartElement("span");
w.writeAttribute("class", "span1");
w.writeAttribute("id", "spaninterval");
w.writeCharacters(interval.toNiceString());
// span
w.writeEndElement();
w.writeEmptyElement("input");
w.writeAttribute("name", "custominterval");
w.writeAttribute("id", "custominterval");
w.writeAttribute("type", "text");
w.writeAttribute("value", "");
w.writeStartElement("button");
w.writeAttribute("class", "btn");
w.writeAttribute("name", "parseinterval");
w.writeCharacters("GO");
// button
w.writeEndElement();
// form
w.writeEndElement();
// div
w.writeEndElement();
if (this.commentPath != null) {
w.writeStartElement("div");
w.writeAttribute("class", "comment");
w.writeStartElement("label");
w.writeAttribute("for", "comment");
w.writeCharacters("Comment:");
w.writeEndElement();
w.writeEmptyElement("input");
w.writeAttribute("id", "comment");
w.writeAttribute("type", "text");
w.writeAttribute("placeholder", "Comment about " + interval.toNiceString());
w.writeStartElement("button");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "commentbtn");
w.writeCharacters("Send comment");
// button
w.writeEndElement();
w.writeCharacters(" ");
w.writeStartElement("select");
w.writeAttribute("class", "btn");
w.writeAttribute("id", "shortcuts");
w.writeEmptyElement("option");
for (final String opt : new String[] { "OK", "BAD", "Noisy", "False positive", "Deletion", "Later" }) {
w.writeStartElement("option");
w.writeAttribute("value", " " + opt + ".");
w.writeCharacters(opt);
w.writeEndElement();
}
// select
w.writeEndElement();
// div
w.writeEndElement();
}
w.writeComment("END FORM");
if (this.gtfFile != null) {
w.writeStartElement("div");
w.writeAttribute("class", "gtf");
getGenes(interval).filter(G -> G.getType().equals("gene")).forEach(gtfLine -> {
try {
String key = null;
if (gtfLine.getAttributes().containsKey("gene_id")) {
key = gtfLine.getAttribute("gene_id");
}
if (gtfLine.getAttributes().containsKey("gene_name")) {
key = gtfLine.getAttribute("gene_name");
}
if (StringUtils.isBlank(key))
return;
final String url = "https://www.ncbi.nlm.nih.gov/gene/?term=" + StringUtils.escapeHttp(key);
w.writeStartElement("a");
w.writeAttribute("href", url);
w.writeAttribute("target", "_blank");
w.writeAttribute("title", key);
w.writeCharacters(key);
w.writeEndElement();
w.writeCharacters(" ");
} catch (final XMLStreamException err) {
}
});
// div
w.writeEndElement();
}
if (this.knownCnvFile != null) {
final Interval final_interval = new Interval(interval);
final ToDoubleFunction<Interval> getOvelap = R -> {
if (!R.overlaps(final_interval))
return 0;
final double L1 = R.getIntersectionLength(final_interval);
final double r1 = L1 / R.getLengthOnReference();
final double r2 = L1 / final_interval.getLengthOnReference();
return Double.min(r1, r2);
};
w.writeStartElement("div");
w.writeAttribute("class", "known");
w.writeStartElement("label");
w.writeCharacters("Known: ");
w.writeEndElement();
getKnownCnv(interval).sorted((A, B) -> Double.compare(getOvelap.applyAsDouble(B), (getOvelap.applyAsDouble(A)))).limit(100).forEach(R -> {
try {
w.writeStartElement("span");
w.writeCharacters(new SimpleInterval(R).toNiceString());
if (!StringUtils.isBlank(R.getName())) {
w.writeCharacters(" ");
w.writeCharacters(R.getName());
}
w.writeEndElement();
w.writeCharacters("; ");
// span
w.writeEndElement();
} catch (final Throwable err) {
LOG.warn(err);
}
});
// div
w.writeEndElement();
}
/* write anchors to samples */
w.writeStartElement("div");
w.writeAttribute("class", "allsamples");
w.writeCharacters("Samples: ");
for (final BamInput bi : this.bamInput.stream().sorted((A, B) -> A.sample.compareTo(B.sample)).collect(Collectors.toList())) {
w.writeStartElement("a");
w.writeAttribute("title", bi.sample);
w.writeAttribute("href", "#" + bi.sample);
w.writeAttribute("onclick", "highlight('" + bi.sample + "');");
w.writeCharacters("[" + bi.sample);
if (pedigree != null) {
final Sample sn = this.pedigree.getSampleById(bi.sample);
if (sn != null && sn.isAffected()) {
w.writeCharacters(" ");
w.writeEntityRef("#128577");
}
}
w.writeCharacters("] ");
w.writeEndElement();
}
// div
w.writeEndElement();
w.writeCharacters("\n");
w.writeStartElement("div");
w.writeAttribute("class", "grid-container");
for (int i = 0; i < this.bamInput.size(); i++) {
final BamInput bamInput = this.bamInput.get(i);
final Path bam = bamInput.bamPath;
final Sample sample = this.pedigree == null ? null : this.pedigree.getSampleById(bamInput.sample);
w.writeStartElement("div");
w.writeAttribute("id", bamInput.sample + "_div");
w.writeAttribute("class", "sample" + (sample != null && sample.isAffected() ? " affected" : ""));
w.writeEmptyElement("a");
w.writeAttribute("name", bamInput.sample);
w.writeStartElement("h3");
w.writeCharacters(bamInput.sample);
w.writeCharacters(" ");
w.writeStartElement("a");
w.writeAttribute("href", "#top");
w.writeAttribute("title", "top");
w.writeCharacters("[^]");
w.writeEndElement();
{
if (sample != null) {
writeSample(w, sample);
w.writeStartElement("span");
w.writeAttribute("class", "parents");
for (int p = 0; p < 2; ++p) {
if (p == 0 && !sample.hasFather())
continue;
if (p == 1 && !sample.hasMother())
continue;
final Sample parent = (p == 0 ? sample.getFather() : sample.getMother());
boolean has_bam = this.bamInput.stream().anyMatch(B -> B.sample.equals(parent.getId()));
w.writeCharacters(" ");
w.writeCharacters(p == 0 ? "Father " : "Mother ");
if (has_bam) {
w.writeStartElement("a");
w.writeAttribute("href", "#" + parent.getId());
w.writeAttribute("title", parent.getId());
w.writeAttribute("onclick", "highlight('" + parent.getId() + "');");
w.writeCharacters("[" + parent.getId() + "].");
w.writeEndElement();
} else {
w.writeCharacters(parent.getId());
}
writeSample(w, parent);
}
w.writeEndElement();
final Set<Sample> children = sample.getChildren();
if (!children.isEmpty()) {
w.writeStartElement("span");
w.writeAttribute("class", "children");
w.writeCharacters(" Has Children :");
for (final Sample child : children) {
boolean has_bam = this.bamInput.stream().anyMatch(B -> B.sample.equals(child.getId()));
w.writeCharacters(" ");
if (has_bam) {
w.writeStartElement("a");
w.writeAttribute("href", "#" + child.getId());
w.writeAttribute("title", child.getId());
w.writeAttribute("onclick", "highlight('" + child.getId() + "');");
w.writeCharacters("[" + child.getId() + "].");
w.writeEndElement();
} else {
w.writeCharacters(child.getId());
}
writeSample(w, child);
}
w.writeEndElement();
}
// end has children
}
}
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("class", "bampath");
w.writeCharacters(bam.toString());
w.writeEndElement();
w.writeEmptyElement("img");
w.writeAttribute("id", "bamid" + i);
w.writeAttribute("src", "https://upload.wikimedia.org/wikipedia/commons/d/de/Ajax-loader.gif");
w.writeAttribute("width", "32");
w.writeAttribute("height", "32");
// div
w.writeEndElement();
w.writeCharacters("\n");
}
w.writeEndElement();
w.writeEmptyElement("hr");
w.writeStartElement("div");
w.writeCharacters("Author: Pierre Lindenbaum. ");
w.writeCharacters(JVarkitVersion.getInstance().getLabel());
w.writeEndElement();
// body
w.writeEndElement();
// html
w.writeEndElement();
w.flush();
w.close();
} catch (XMLStreamException err) {
LOG.warn(err);
throw new IOException(err);
} finally {
pw.close();
}
}
use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.
the class VcfFilterNotInPedigree method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
final int IGNORE_SINGLETON = -1;
final Predicate<Genotype> acceptFilteredGenotype = G -> G != null && (this.ignoreFilteredGT == false || !G.isFiltered());
final Pedigree pedigree;
final VCFHeader header = iterin.getHeader();
try {
pedigree = new PedigreeParser().parse(this.pedigreeFile);
} catch (final IOException err) {
LOG.error("Cannot read pedigree in file: " + this.pedigreeFile, err);
return -1;
}
if (pedigree.isEmpty()) {
throw new JvarkitException.PedigreeError("No pedigree found in header. use VcfInjectPedigree to add it");
}
pedigree.checkUniqIds();
final Set<String> samplesNames = new HashSet<>(header.getSampleNamesInOrder());
final Set<Sample> individuals = new HashSet<>(pedigree.getSamples());
final Iterator<Sample> iter = individuals.iterator();
while (iter.hasNext()) {
final Sample person = iter.next();
if (!(samplesNames.contains(person.getId()))) {
LOG.warn("Ignoring " + person + " because not in VCF header.");
iter.remove();
}
}
final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(this.filterName, "Will be set for variant where the only genotypes non-homref are NOT in the pedigree ");
final VCFFilterHeaderLine singletonFilter = new VCFFilterHeaderLine(this.singletonfilterName, "The ALT allele is found in less or equals than " + this.singleton + " individuals in the cases/controls");
final VCFHeader h2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, h2);
if (this.singleton != IGNORE_SINGLETON) {
h2.addMetaDataLine(singletonFilter);
}
out.writeHeader(h2);
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
final boolean in_pedigree = individuals.stream().map(P -> ctx.getGenotype(P.getId())).filter(acceptFilteredGenotype).anyMatch(g -> (!(g == null || !g.isCalled() || !g.isAvailable() || g.isNoCall() || g.isHomRef())));
if (!in_pedigree) {
if (this.dicardVariant)
continue;
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filter.getID());
out.add(vcb.make());
} else {
boolean is_singleton;
if (this.singleton != IGNORE_SINGLETON) {
is_singleton = true;
for (final Allele alt : ctx.getAlternateAlleles()) {
if (individuals.stream().map(P -> ctx.getGenotype(P.getId())).filter(acceptFilteredGenotype).filter(g -> g != null && g.isCalled() && g.getAlleles().contains(alt)).count() > this.singleton) {
is_singleton = false;
break;
}
}
} else {
is_singleton = false;
}
if (is_singleton) {
if (this.dicardVariant)
continue;
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(singletonFilter.getID());
out.add(vcb.make());
} else {
out.add(ctx);
}
}
}
return 0;
}
use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.
the class VcfToSvg method doWork.
@Override
public int doWork(final List<String> args) {
VCFReader r = null;
OutputStream outputStream = null;
XMLStreamWriter w = null;
PrintWriter manifestW = null;
ArchiveFactory archiveFactory = null;
try {
r = VCFReaderFactory.makeDefault().open(Paths.get(oneAndOnlyOneFile(args)), true);
final VCFHeader header = r.getHeader();
final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
intervalListProvider.dictionary(dict);
/* read gtf if any */
final IntervalTreeMap<Gene> geneMap = new IntervalTreeMap<>();
if (this.gtfPath != null) {
try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
gtfReader.getAllGenes().stream().filter(G -> !this.removeNonCoding || G.getTranscripts().stream().anyMatch(T -> T.isCoding())).forEach(G -> geneMap.put(new Interval(G), G));
}
}
archiveFactory = ArchiveFactory.open(this.outputPath);
if (manifestFile != null) {
manifestW = IOUtils.openPathForPrintWriter(this.manifestFile);
} else {
manifestW = new PrintWriter(new NullOuputStream());
}
final Pedigree pedigree;
if (this.pedPath == null) {
pedigree = PedigreeParser.empty();
} else {
pedigree = new PedigreeParser().parse(this.pedPath);
}
final Path tmpSvg = Files.createTempFile("vcf.", ".svg");
final XMLOutputFactory xof = XMLOutputFactory.newInstance();
for (final Locatable interval : intervalListProvider.dictionary(dict).stream().collect(Collectors.toList())) {
final List<VariantContext> variants = r.query(interval).stream().filter(V -> this.variantFILTEREDOpacity > 0 || !V.isFiltered()).filter(V -> this.variantIndelOpacity > 0 || !V.isIndel()).collect(Collectors.toCollection(ArrayList::new));
if (variants.isEmpty())
continue;
final List<Transcript> transcripts = geneMap.getOverlapping(interval).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> !this.removeNonCoding || T.isCoding()).collect(Collectors.toList());
variants.removeIf(V -> this.gtfPath != null && this.variantsInExonOnly && transcripts.stream().flatMap(T -> T.getExons().stream()).noneMatch(EX -> EX.overlaps(V)));
if (variants.isEmpty())
continue;
final String geneId = transcripts.stream().map(T -> T.getGene().getId()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
final String geneName = transcripts.stream().map(T -> T.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
outputStream = IOUtils.openPathForWriting(tmpSvg);
w = xof.createXMLStreamWriter(outputStream);
double featureHeight = 10;
double TRANSCRIPT_HEIGHT = featureHeight;
final int all_genotypes_width = variants.size() * this.genotype_width;
final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
final int interline_weight = 6;
final int margin_top = 10;
final int margin_bottom = 10;
final int margin_right = 100;
final int margin_left = 100;
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + transcripts.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + samples.size() * this.genotype_width));
title(w, interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
w.writeStartElement("desc");
w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
w.writeEndElement();
// defs
w.writeStartElement("defs");
// genotypes
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:silver;stroke;gray;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:crimson;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.MIXED);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:pink;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:gray;stroke;none;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(this.genotype_width));
w.writeAttribute("height", String.valueOf(this.genotype_width));
w.writeEndElement();
w.writeStartElement("g");
//
w.writeAttribute("id", "g_" + GenotypeType.HET);
w.writeEmptyElement("rect");
w.writeAttribute("style", "fill:lime;stroke;black;");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(genotype_width));
w.writeAttribute("height", String.valueOf(genotype_width));
w.writeEmptyElement("polygon");
w.writeAttribute("style", "fill:crimson;stroke;black;");
w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
w.writeEndElement();
// strand
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandF");
w.writeAttribute("points", "-5,-5 0,0 -5,5");
w.writeEmptyElement("polyline");
w.writeAttribute("id", "strandR");
w.writeAttribute("points", "5,-5 0,0 5,5");
// gradients
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
// style
w.writeEndElement();
final IntFunction<Integer> trim = t -> Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
final IntFunction<Double> baseToPixel = t -> margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.getLengthOnReference());
final IntFunction<Double> variantIndexToPixel = idx -> {
final double variant_width = drawinAreaWidth / (double) variants.size();
final double midx = variant_width * idx + variant_width / 2.0;
return margin_left + midx - genotype_width / 2.0;
};
final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
/**
* title
*/
double y = 0;
w.writeStartElement("text");
w.writeAttribute("x", "0");
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeCharacters(interval.toString());
w.writeEndElement();
y += featureHeight;
for (final Transcript g : transcripts) {
int cdsHeigh = 5;
double exonHeight = TRANSCRIPT_HEIGHT - 5;
double midY = TRANSCRIPT_HEIGHT / 2;
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
title(w, g.getId());
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(featureHeight));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(g.getId());
w.writeEndElement();
/* transcript line */
w.writeEmptyElement("line");
w.writeAttribute("class", "kgtr");
w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
w.writeAttribute("y1", String.valueOf(midY));
w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
w.writeAttribute("y2", String.valueOf(midY));
/* strand symbols */
for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.getLengthOnReference();
if (pos0 + 1 < g.getTxStart())
continue;
if (pos0 > g.getTxEnd())
break;
w.writeEmptyElement("use");
w.writeAttribute("class", "kgstrand");
w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
w.writeAttribute("x", String.valueOf(margin_left + pixX));
w.writeAttribute("y", String.valueOf(midY));
}
/* exons */
for (final Exon exon : g.getExons()) {
if (exon.getStart() + 1 >= interval.getEnd())
continue;
if (exon.getEnd() <= interval.getStart())
continue;
w.writeStartElement("rect");
w.writeAttribute("class", "kgexon");
w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
w.writeAttribute("height", String.valueOf(exonHeight));
title(w, exon.getName());
w.writeEndElement();
}
/* coding line */
if (!g.isNonCoding() && g.hasCodonStartDefined() && g.hasCodonStopDefined()) {
final double codonx1 = baseToPixel.apply(trim.apply(g.getLeftmostCodon().get().getStart()));
final double codonx2 = baseToPixel.apply(trim.apply(g.getRightmostCodon().get().getEnd()));
w.writeEmptyElement("rect");
w.writeAttribute("class", "kgcds");
w.writeAttribute("x", String.valueOf(codonx1));
w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
w.writeAttribute("width", String.valueOf(baseToPixel.apply((int) (codonx2 - codonx1))));
w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
}
// String label=String.format("%15s", g.getName());
// w.writeEmptyElement("path");
// double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
// w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
w.writeEndElement();
w.writeCharacters("\n");
y += featureHeight;
}
/* draw lines to variants */
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
double x1 = baseToPixel.apply(vc.getStart());
double x2 = baseToPixel.apply(vc.getEnd());
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("polygon");
w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
title(w, variantTitle.apply(vc));
w.writeEndElement();
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final double y2 = y + featureHeight * interline_weight;
w.writeStartElement("text");
w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("class", "postick");
w.writeCharacters(variantTitle.apply(vc));
w.writeEndElement();
w.writeCharacters("\n");
}
y += featureHeight * interline_weight;
w.writeStartElement("g");
/* step 0: affected, 1: unaffected, 2: others */
for (int step = 0; step < 3; ++step) {
for (final String sample : samples) {
final Sample individual = pedigree.getSampleById(sample);
if (step == 0 && (individual == null || !individual.isAffected()))
continue;
if (step == 1 && (individual == null || !individual.isUnaffected()))
continue;
if (step == 2 && individual != null && individual.isStatusSet())
continue;
// sample
w.writeStartElement("g");
switch(step) {
case 0:
w.writeAttribute("style", "hue-rotate(195deg);");
break;
case 1:
w.writeAttribute("style", "hue-rotate(45deg);");
break;
default:
break;
}
for (int vidx = 0; vidx < variants.size(); ++vidx) {
final VariantContext vc = variants.get(vidx);
final Genotype g = vc.getGenotype(sample);
double opacity = 1.0;
if (vc.isIndel())
opacity *= this.variantIndelOpacity;
if (vc.isFiltered())
opacity *= this.variantFILTEREDOpacity;
if (opacity > 1)
opacity = 1;
if (opacity <= 0)
continue;
if (opacity < 1) {
w.writeStartElement("g");
w.writeAttribute("style", "opacity:" + opacity + ";");
}
w.writeEmptyElement("use");
w.writeAttribute("x", String.valueOf(variantIndexToPixel.apply(vidx)));
w.writeAttribute("y", String.valueOf(y));
w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
if (opacity < 1) {
w.writeEndElement();
}
}
w.writeCharacters("\n");
w.writeStartElement("text");
w.writeAttribute("x", String.valueOf(margin_left - 10));
w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
w.writeAttribute("style", "text-anchor:end;");
w.writeCharacters(sample);
// text
w.writeEndElement();
// g for sample
w.writeEndElement();
y += this.genotype_width;
}
}
w.writeCharacters("\n");
w.writeEndDocument();
w.writeCharacters("\n");
w.flush();
w.close();
final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig().replaceAll("[/\\:]", "_") + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(geneName) ? "" : "." + geneName.replaceAll("[/\\:]", "")) + (StringUtils.isBlank(geneId) ? "" : "." + geneId.replaceAll("[/\\:]", "")) + ".svg";
OutputStream os = archiveFactory.openOuputStream(filename);
IOUtils.copyTo(tmpSvg, os);
os.flush();
os.close();
Files.delete(tmpSvg);
manifestW.print(interval.getContig());
manifestW.print('\t');
manifestW.print(interval.getStart() - 1);
manifestW.print('\t');
manifestW.print(interval.getEnd());
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print(transcripts.stream().map(G -> G.getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
manifestW.print('\t');
manifestW.print((archiveFactory.isTarOrZipArchive() ? "" : this.outputPath.toString() + File.separator) + filename);
manifestW.print('\t');
manifestW.println(variants.size());
}
r.close();
manifestW.flush();
manifestW.close();
manifestW = null;
archiveFactory.close();
archiveFactory = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(archiveFactory);
CloserUtil.close(r);
CloserUtil.close(outputStream);
CloserUtil.close(manifestW);
}
}
use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.
the class VCFTrios method doVcfToVcf.
@Override
public int doVcfToVcf(final String inputName, VCFIterator r, final VariantContextWriter w) {
long count_incompats = 0L;
final Set<String> sampleNotFoundInVcf = new HashSet<>();
Pedigree pedigree = null;
final List<TrioTriple> trios = new ArrayList<>();
try {
final DeNovoDetector detector = new DeNovoDetector();
detector.setConvertingNoCallToHomRef(this.nocall_to_homref);
final VCFHeader header = r.getHeader();
final PedigreeParser pedParser = new PedigreeParser();
pedigree = pedParser.parse(this.pedigreeFile);
final VCFHeader h2 = new VCFHeader(header);
final Set<VCFHeaderLine> meta = new HashSet<>();
meta.add(new VCFInfoHeaderLine(this.attributeName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples with mendelian incompatibilities." + (this.pedigreeFile == null ? "" : " Pedigree File was : " + this.pedigreeFile)));
meta.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY, true));
if (!StringUtil.isBlank(this.filterAnyIncompat)) {
meta.add(new VCFFilterHeaderLine(this.filterAnyIncompat, "Variant contains at least one mendelian incompatibilities"));
}
if (!StringUtil.isBlank(this.filterNoIncompat)) {
meta.add(new VCFFilterHeaderLine(this.filterNoIncompat, "Variant does not contain any mendelian incompatibilities"));
}
meta.stream().forEach(H -> h2.addMetaDataLine(H));
JVarkitVersion.getInstance().addMetaData(this, h2);
for (final Trio pedTrio : pedigree.getTrios()) {
final TrioTriple trio = new TrioTriple();
final Sample child = pedTrio.getChild();
trio.child_id = header.getSampleNameToOffset().getOrDefault(child.getId(), -1);
if (trio.child_id < 0)
continue;
if (pedTrio.hasFather()) {
final Sample parent = pedTrio.getFather();
trio.father_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
}
if (pedTrio.hasMother()) {
final Sample parent = pedTrio.getMother();
trio.mother_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
}
if (trio.father_id == -1 && trio.mother_id == -1) {
continue;
}
trios.add(trio);
}
LOG.info("trios(s) in pedigree: " + trios.size());
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = progress.apply(r.next());
final Set<String> incompatibilities = new HashSet<String>();
for (final TrioTriple trio : trios) {
final Genotype gChild = ctx.getGenotype(trio.child_id);
if (gChild == null)
throw new IllegalStateException();
final Genotype gFather = trio.father_id < 0 ? null : ctx.getGenotype(trio.father_id);
final Genotype gMother = trio.mother_id < 0 ? null : ctx.getGenotype(trio.mother_id);
final DeNovoDetector.DeNovoMutation mut = detector.test(ctx, gFather, gMother, gChild);
if (mut != null) {
incompatibilities.add(gChild.getSampleName());
}
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (!incompatibilities.isEmpty()) {
// set filter for samples that are not a mendelian violation
if (!StringUtil.isBlank(this.genotypeFilterNameNoIncompat)) {
vcb.genotypes(ctx.getGenotypes().stream().map(G -> incompatibilities.contains(G.getSampleName()) ? G : new GenotypeBuilder(G).filters(this.genotypeFilterNameNoIncompat).make()).collect(Collectors.toList()));
}
++count_incompats;
// set INFO attribute
vcb.attribute(attributeName, incompatibilities.toArray());
// set FILTER
if (!StringUtil.isBlank(this.filterAnyIncompat)) {
vcb.filter(this.filterAnyIncompat);
} else if (!ctx.isFiltered()) {
vcb.passFilters();
}
} else // No denovo
{
// dicard variant
if (this.discard_variants_without_mendelian_incompat) {
continue;
}
// set filters
if (!StringUtil.isBlank(this.filterNoIncompat)) {
vcb.filter(this.filterNoIncompat);
} else if (!ctx.isFiltered()) {
vcb.passFilters();
}
}
w.add(vcb.make());
}
progress.close();
LOG.info("incompatibilitie(s) N=" + count_incompats);
if (!sampleNotFoundInVcf.isEmpty()) {
LOG.info("SAMPLE(S) not found: " + String.join(" / ", sampleNotFoundInVcf));
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
Aggregations