use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term in project jvarkit by lindenb.
the class VCFCompare method doWork.
@Override
public int doWork(final List<String> args) {
if (args.isEmpty()) {
LOG.error("VCFs missing.");
return -1;
}
if (args.size() != 2) {
System.err.println("Illegal number or arguments. Expected two VCFs");
return -1;
}
PrintWriter pw = null;
XMLStreamWriter w = null;
InputStream in = null;
SortingCollection<LineAndFile> variants = null;
try {
LineAndFileComparator varcmp = new LineAndFileComparator();
variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), varcmp, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
variants.setDestructiveIteration(true);
for (int i = 0; i < 2; ++i) {
this.inputs[i] = new Input();
this.inputs[i].codec = VCFUtils.createDefaultVCFCodec();
this.inputs[i].filename = args.get(i);
LOG.info("Opening " + this.inputs[i].filename);
in = IOUtils.openURIForReading(this.inputs[i].filename);
final LineReader lr = new SynchronousLineReader(in);
final LineIterator li = new LineIteratorImpl(lr);
this.inputs[i].header = (VCFHeader) this.inputs[i].codec.readActualHeader(li);
this.inputs[i].vepPredictionParser = new VepPredictionParserFactory(this.inputs[i].header).get();
this.inputs[i].snpEffPredictionParser = new SnpEffPredictionParserFactory(this.inputs[i].header).get();
this.inputs[i].annPredictionParser = new AnnPredictionParserFactory(this.inputs[i].header).get();
while (li.hasNext()) {
LineAndFile laf = new LineAndFile();
laf.fileIdx = i;
laf.line = li.next();
variants.add(laf);
}
LOG.info("Done Reading " + this.inputs[i].filename);
CloserUtil.close(li);
CloserUtil.close(lr);
CloserUtil.close(in);
}
variants.doneAdding();
LOG.info("Done Adding");
Set<String> commonSamples = new TreeSet<String>(this.inputs[0].header.getSampleNamesInOrder());
commonSamples.retainAll(this.inputs[1].header.getSampleNamesInOrder());
List<Venn0> venn1List = new ArrayList<VCFCompare.Venn0>();
venn1List.add(new Venn1("ALL"));
venn1List.add(new Venn1("having ID") {
@Override
public VariantContext filter(VariantContext ctx, int fileIndex) {
return ctx == null || !ctx.hasID() ? null : ctx;
}
});
venn1List.add(new Venn1("QUAL greater 30") {
@Override
public VariantContext filter(VariantContext ctx, int fileIndex) {
return ctx == null || !ctx.hasLog10PError() || ctx.getPhredScaledQual() < 30.0 ? null : ctx;
}
});
for (VariantContext.Type t : VariantContext.Type.values()) {
venn1List.add(new VennType(t));
}
for (SequenceOntologyTree.Term term : SequenceOntologyTree.getInstance().getTerms()) {
venn1List.add(new VennPred("vep", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (VepPredictionParser.VepPrediction pred : VCFCompare.this.inputs[file_id].vepPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
venn1List.add(new VennPred("SnpEff", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (SnpEffPredictionParser.SnpEffPrediction pred : VCFCompare.this.inputs[file_id].snpEffPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
venn1List.add(new VennPred("ANN", term) {
@Override
Set<Term> terms(VariantContext ctx, int file_id) {
Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
for (AnnPredictionParser.AnnPrediction pred : VCFCompare.this.inputs[file_id].annPredictionParser.getPredictions(ctx)) {
tt.addAll(pred.getSOTerms());
}
return tt;
}
});
}
for (String s : commonSamples) {
venn1List.add(new VennGType(s));
}
/* START : digest results ====================== */
Counter<String> diff = new Counter<String>();
List<LineAndFile> row = new ArrayList<LineAndFile>();
CloseableIterator<LineAndFile> iter = variants.iterator();
for (; ; ) {
LineAndFile rec = null;
if (iter.hasNext()) {
rec = iter.next();
}
if (rec == null || (!row.isEmpty() && varcmp.compare(row.get(0), rec) != 0)) {
if (!row.isEmpty()) {
diff.incr("count.variations");
VariantContext[] contexes_init = new VariantContext[] { null, null };
for (LineAndFile var : row) {
if (contexes_init[var.fileIdx] != null) {
LOG.error("Duplicate context in " + inputs[var.fileIdx].filename + " : " + var.line);
continue;
}
contexes_init[var.fileIdx] = var.getContext();
}
for (Venn0 venn : venn1List) {
venn.visit(contexes_init);
}
row.clear();
}
if (rec == null)
break;
}
row.add(rec);
}
iter.close();
/* END : digest results ====================== */
pw = super.openFileOrStdoutAsPrintWriter(outputFile);
XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
w = xmlfactory.createXMLStreamWriter(pw);
w.writeStartElement("html");
w.writeStartElement("body");
/* specific samples */
w.writeStartElement("div");
w.writeStartElement("dl");
for (int i = 0; i < 3; ++i) {
String title;
Set<String> samples;
switch(i) {
case 0:
case 1:
title = "Sample(s) for " + this.inputs[i].filename + ".";
samples = new TreeSet<String>(this.inputs[i].header.getSampleNamesInOrder());
samples.removeAll(commonSamples);
break;
default:
title = "Common Sample(s).";
samples = new TreeSet<String>(commonSamples);
break;
}
w.writeStartElement("dt");
w.writeCharacters(title);
w.writeEndElement();
w.writeStartElement("dd");
w.writeStartElement("ol");
for (String s : samples) {
w.writeStartElement("li");
w.writeCharacters(s);
w.writeEndElement();
}
w.writeEndElement();
w.writeEndElement();
}
// dl
w.writeEndElement();
// div
w.writeEndElement();
for (Venn0 v : venn1List) {
v.write(w);
}
// body
w.writeEndElement();
// html
w.writeEndElement();
w.writeEndDocument();
w.close();
w = null;
pw.flush();
pw.close();
pw = null;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(pw);
if (variants != null)
variants.cleanup();
}
return 0;
}
Aggregations