use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree in project jvarkit by lindenb.
the class VCFPredictions method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, VariantContextWriter w) {
ReferenceContig genomicSequence = null;
try {
LOG.info("opening REF:" + this.referenceGenomeSource);
this.referenceGenome = new ReferenceGenomeFactory().open(this.referenceGenomeSource);
loadKnownGenesFromUri();
final VCFHeader header = (VCFHeader) r.getHeader();
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(this.referenceGenome.getDictionary());
contigNameConverter.setOnNotFound(OnNotFound.SKIP);
final VCFHeader h2 = new VCFHeader(header);
addMetaData(h2);
switch(this.outputSyntax) {
case Vep:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
break;
}
case SnpEff:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
break;
}
default:
{
final StringBuilder format = new StringBuilder();
for (FORMAT1 f : FORMAT1.values()) {
if (format.length() > 0)
format.append("|");
format.append(f.name());
}
h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
break;
}
}
w.writeHeader(h2);
final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
final SequenceOntologyTree.Term so_intron = soTree.getTermByAcn("SO:0001627");
final SequenceOntologyTree.Term so_exon = soTree.getTermByAcn("SO:0001791");
final SequenceOntologyTree.Term so_splice_donor = soTree.getTermByAcn("SO:0001575");
final SequenceOntologyTree.Term so_splice_acceptor = soTree.getTermByAcn("SO:0001574");
final SequenceOntologyTree.Term so_5_prime_UTR_variant = soTree.getTermByAcn("SO:0001623");
final SequenceOntologyTree.Term so_3_prime_UTR_variant = soTree.getTermByAcn("SO:0001624");
final SequenceOntologyTree.Term so_splicing_variant = soTree.getTermByAcn("SO:0001568");
final SequenceOntologyTree.Term so_stop_lost = soTree.getTermByAcn("SO:0001578");
final SequenceOntologyTree.Term so_stop_gained = soTree.getTermByAcn("SO:0001587");
final SequenceOntologyTree.Term so_coding_synonymous = soTree.getTermByAcn("SO:0001819");
final SequenceOntologyTree.Term so_coding_non_synonymous = soTree.getTermByAcn("SO:0001583");
final SequenceOntologyTree.Term so_intergenic = soTree.getTermByAcn("SO:0001628");
final SequenceOntologyTree.Term so_nc_transcript_variant = soTree.getTermByAcn("SO:0001619");
final SequenceOntologyTree.Term so_non_coding_exon_variant = soTree.getTermByAcn("SO:0001792");
final SequenceOntologyTree.Term _2KB_upstream_variant = soTree.getTermByAcn("SO:0001636");
final SequenceOntologyTree.Term _5KB_upstream_variant = soTree.getTermByAcn("SO:0001635");
final SequenceOntologyTree.Term _5KB_downstream_variant = soTree.getTermByAcn("SO:0001633");
final SequenceOntologyTree.Term _500bp_downstream_variant = soTree.getTermByAcn("SO:0001634");
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (r.hasNext()) {
final VariantContext ctx = progress.watch(r.next());
final String normalizedContig = contigNameConverter.apply(ctx.getContig());
final List<KnownGene> genes = new ArrayList<>();
if (!StringUtil.isBlank(normalizedContig)) {
for (final List<KnownGene> l2 : this.knownGenes.getOverlapping(new Interval(normalizedContig, ctx.getStart(), // 1-based
ctx.getEnd()))) {
genes.addAll(l2);
}
}
final List<Annotation> ctx_annotations = new ArrayList<Annotation>();
if (genes == null || genes.isEmpty()) {
// intergenic
Annotation a = new Annotation();
a.seqont.add(so_intergenic);
ctx_annotations.add(a);
} else {
if (genomicSequence == null || !genomicSequence.hasName(normalizedContig)) {
LOG.info("getting genomic Sequence for " + normalizedContig);
genomicSequence = this.referenceGenome.getContig(normalizedContig);
if (genomicSequence == null)
throw new JvarkitException.ContigNotFoundInDictionary(normalizedContig, this.referenceGenome.getDictionary());
}
for (final KnownGene gene : genes) {
final GeneticCode geneticCode = GeneticCode.getStandard();
for (final Allele alt2 : ctx.getAlternateAlleles()) {
if (alt2.isNoCall())
continue;
if (alt2.isSymbolic()) {
LOG.warn("symbolic allele are not handled... " + alt2.getDisplayString());
continue;
}
if (alt2.isReference())
continue;
final Annotation annotations = new Annotation();
annotations.kg = gene;
annotations.alt2 = alt2;
if (gene.isNonCoding()) {
annotations.seqont.add(so_nc_transcript_variant);
continue;
}
ctx_annotations.add(annotations);
StringBuilder wildRNA = null;
ProteinCharSequence wildProt = null;
ProteinCharSequence mutProt = null;
MutedSequence mutRNA = null;
int position_in_cds = -1;
final int position = ctx.getStart() - 1;
if (!String.valueOf(genomicSequence.charAt(position)).equalsIgnoreCase(ctx.getReference().getBaseString())) {
if (isSimpleBase(ctx.getReference())) {
LOG.warn("Warning REF!=GENOMIC SEQ!!! at " + position + "/" + ctx.getReference());
}
continue;
}
if (gene.isPositiveStrand()) {
if (position < gene.getTxStart() - 2000) {
annotations.seqont.add(_5KB_upstream_variant);
} else if (position < gene.getTxStart()) {
annotations.seqont.add(_2KB_upstream_variant);
} else if (position >= gene.getTxEnd() + 500) {
annotations.seqont.add(_5KB_downstream_variant);
} else if (position >= gene.getTxEnd()) {
annotations.seqont.add(_500bp_downstream_variant);
} else if (position < gene.getCdsStart()) {
// UTR5
annotations.seqont.add(so_5_prime_UTR_variant);
} else if (gene.getCdsEnd() <= position) {
annotations.seqont.add(so_3_prime_UTR_variant);
} else {
int exon_index = 0;
while (exon_index < gene.getExonCount()) {
final KnownGene.Exon exon = gene.getExon(exon_index);
for (int i = exon.getStart(); i < exon.getEnd(); ++i) {
if (i == position) {
annotations.exon_name = exon.getName();
if (exon.isNonCoding()) {
annotations.seqont.add(so_non_coding_exon_variant);
}
}
if (i < gene.getTxStart())
continue;
if (i < gene.getCdsStart())
continue;
if (i >= gene.getCdsEnd())
break;
if (wildRNA == null) {
wildRNA = new StringBuilder();
mutRNA = new MutedSequence(wildRNA);
}
if (i == position) {
annotations.seqont.add(so_exon);
annotations.exon_name = exon.getName();
position_in_cds = wildRNA.length();
annotations.position_cds = position_in_cds;
// in splicing ?
if (exon.isSplicing(position)) {
if (exon.isSplicingAcceptor(position)) {
// SPLICING_ACCEPTOR
annotations.seqont.add(so_splice_acceptor);
} else if (exon.isSplicingDonor(position)) {
// SPLICING_DONOR
annotations.seqont.add(so_splice_donor);
} else // ??
{
annotations.seqont.add(so_splicing_variant);
}
}
}
wildRNA.append(genomicSequence.charAt(i));
if (i == position && isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
mutRNA.put(position_in_cds, alt2.getBaseString().charAt(0));
}
if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
wildProt = new ProteinCharSequence(geneticCode, wildRNA);
mutProt = new ProteinCharSequence(geneticCode, mutRNA);
}
}
final KnownGene.Intron intron = exon.getNextIntron();
if (intron != null && intron.contains(position)) {
annotations.intron_name = intron.getName();
annotations.seqont.add(so_intron);
if (intron.isSplicing(position)) {
if (intron.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (intron.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ???
{
annotations.seqont.add(so_splicing_variant);
}
}
}
++exon_index;
}
}
} else // reverse orientation
{
if (position >= gene.getTxEnd() + 2000) {
annotations.seqont.add(_5KB_upstream_variant);
} else if (position >= gene.getTxEnd()) {
annotations.seqont.add(_2KB_upstream_variant);
} else if (position < gene.getTxStart() - 500) {
annotations.seqont.add(_5KB_downstream_variant);
} else if (position < gene.getTxStart()) {
annotations.seqont.add(_500bp_downstream_variant);
} else if (position < gene.getCdsStart()) {
annotations.seqont.add(so_3_prime_UTR_variant);
} else if (gene.getCdsEnd() <= position) {
annotations.seqont.add(so_5_prime_UTR_variant);
} else {
int exon_index = gene.getExonCount() - 1;
while (exon_index >= 0) {
final KnownGene.Exon exon = gene.getExon(exon_index);
for (int i = exon.getEnd() - 1; i >= exon.getStart(); --i) {
if (i == position) {
annotations.exon_name = exon.getName();
if (exon.isNonCoding()) {
annotations.seqont.add(so_non_coding_exon_variant);
}
}
if (i >= gene.getCdsEnd())
continue;
if (i < gene.getCdsStart())
break;
if (wildRNA == null) {
wildRNA = new StringBuilder();
mutRNA = new MutedSequence(wildRNA);
}
if (i == position) {
annotations.seqont.add(so_exon);
position_in_cds = wildRNA.length();
annotations.position_cds = position_in_cds;
// in splicing ?
if (exon.isSplicing(position)) {
if (exon.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (exon.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ?
{
annotations.seqont.add(so_splicing_variant);
}
}
if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
mutRNA.put(position_in_cds, AcidNucleics.complement(alt2.getBaseString().charAt(0)));
}
}
wildRNA.append(AcidNucleics.complement(genomicSequence.charAt(i)));
if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
wildProt = new ProteinCharSequence(geneticCode, wildRNA);
mutProt = new ProteinCharSequence(geneticCode, mutRNA);
}
}
final KnownGene.Intron intron = exon.getPrevIntron();
if (intron != null && intron.contains(position)) {
annotations.intron_name = intron.getName();
annotations.seqont.add(so_intron);
if (intron.isSplicing(position)) {
if (intron.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (intron.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ?
{
annotations.seqont.add(so_splicing_variant);
}
}
}
--exon_index;
}
}
}
if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference()) && wildProt != null && mutProt != null && position_in_cds >= 0) {
final int pos_aa = position_in_cds / 3;
final int mod = position_in_cds % 3;
annotations.wildCodon = ("" + wildRNA.charAt(position_in_cds - mod + 0) + wildRNA.charAt(position_in_cds - mod + 1) + wildRNA.charAt(position_in_cds - mod + 2));
annotations.mutCodon = ("" + mutRNA.charAt(position_in_cds - mod + 0) + mutRNA.charAt(position_in_cds - mod + 1) + mutRNA.charAt(position_in_cds - mod + 2));
annotations.position_protein = (pos_aa + 1);
annotations.wildAA = String.valueOf(wildProt.charAt(pos_aa));
annotations.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
annotations.seqont.remove(so_exon);
if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
annotations.seqont.add(so_stop_lost);
} else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
annotations.seqont.add(so_stop_gained);
} else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
annotations.seqont.add(so_coding_synonymous);
} else {
annotations.seqont.add(so_coding_non_synonymous);
}
}
}
}
}
final Set<String> info = new HashSet<String>(ctx_annotations.size());
for (final Annotation a : ctx_annotations) {
info.add(a.toString());
}
final VariantContextBuilder vb = new VariantContextBuilder(ctx);
final String thetag;
switch(this.outputSyntax) {
case Vep:
thetag = "CSQ";
break;
case SnpEff:
thetag = "ANN";
break;
default:
thetag = TAG;
break;
}
vb.attribute(thetag, info.toArray());
w.add(vb.make());
}
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.referenceGenome);
}
}
use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree in project jvarkit by lindenb.
the class VcfFilterSequenceOntology method readSequenceOntologyTree.
/**
* return SequenceOntologyTree
*
* @param owluri if null, use getInstance
* @return a SequenceOntologyTree
* @throws IOException
*/
private static SequenceOntologyTree readSequenceOntologyTree(final String owluri) throws IOException {
if (!StringUtil.isBlank(owluri)) {
LOG.info("loading 'SO' tree from " + owluri);
SequenceOntologyTree sequenceOntologyTree = SequenceOntologyTree.fromUri(owluri.trim());
LOG.info("Done loading SO.");
return sequenceOntologyTree;
} else {
return SequenceOntologyTree.getInstance();
}
}
use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree in project jvarkit by lindenb.
the class VcfFilterSequenceOntology method doWork.
@Override
public int doWork(final List<String> args) {
try {
if (this.showList) {
final SequenceOntologyTree tree = readSequenceOntologyTree(this.component.owluri);
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
for (final SequenceOntologyTree.Term t : tree.getTerms()) {
pw.println(t.getAcn() + "\t" + t.getLabel());
}
pw.flush();
pw.close();
return 0;
}
if (this.component.initialize() != 0)
return -1;
return doVcfToVcf(args, this.outputFile);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.component);
}
}
use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree in project jvarkit by lindenb.
the class VcfBurden method doWork2.
private int doWork2(List<String> args) {
ZipOutputStream zout = null;
FileOutputStream fout = null;
VcfIterator in = null;
try {
if (args.isEmpty()) {
LOG.info("reading from stdin.");
in = VCFUtils.createVcfIteratorStdin();
} else if (args.size() == 1) {
String filename = args.get(0);
LOG.info("reading from " + filename);
in = VCFUtils.createVcfIterator(filename);
} else {
LOG.error("Illegal number of arguments.");
return -1;
}
if (outputFile == null) {
LOG.error("undefined output");
return -1;
} else if (!outputFile.getName().endsWith(".zip")) {
LOG.error("output " + outputFile + " should end with .zip");
return -1;
} else {
fout = new FileOutputStream(outputFile);
zout = new ZipOutputStream(fout);
}
final List<String> samples = in.getHeader().getSampleNamesInOrder();
final VCFHeader header = in.getHeader();
String prev_chrom = null;
final VepPredictionParser vepPredParser = new VepPredictionParserFactory().header(header).get();
final Map<GeneTranscript, List<VariantAndCsq>> gene2variants = new HashMap<>();
final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
final Set<SequenceOntologyTree.Term> acn = new HashSet<>();
/* mail solena *SO en remplacement des SO actuels (VEP HIGH + MODERATE) - pas la peine de faire retourner les analyses mais servira pour les futures analyses burden */
String[] acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889", "SO:0001821", "SO:0001822", "SO:0001583", "SO:0001818" /*
"SO:0001589", "SO:0001587", "SO:0001582", "SO:0001583",
"SO:0001575", "SO:0001578", "SO:0001574", "SO:0001889",
"SO:0001821", "SO:0001822", "SO:0001893"*/
};
if (this.highdamage) {
acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889" };
}
for (final String acns : acn_list) {
final SequenceOntologyTree.Term tacn = soTree.getTermByAcn(acns);
if (tacn == null) {
in.close();
throw new NullPointerException("tacn == null pour " + acns);
}
acn.addAll(tacn.getAllDescendants());
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
for (; ; ) {
VariantContext ctx1 = null;
if (in.hasNext()) {
ctx1 = progress.watch(in.next());
if (ctx1.getAlternateAlleles().size() != 1) {
// info("count(ALT)!=1 in "+ctx1.getChr()+":"+ctx1.getStart());
continue;
}
if (ctx1.getAlternateAlleles().get(0).equals(Allele.SPAN_DEL)) {
continue;
}
}
if (ctx1 == null || !ctx1.getContig().equals(prev_chrom)) {
LOG.info("DUMP to zip n=" + gene2variants.size());
final Set<String> geneNames = new HashSet<>();
for (GeneTranscript gene_transcript : gene2variants.keySet()) {
geneNames.add(gene_transcript.geneName);
final Set<VariantAndCsq> uniq = new TreeSet<>(this.variantAndCsqComparator);
uniq.addAll(gene2variants.get(gene_transcript));
dumpVariants(zout, prev_chrom, gene_transcript.geneName + "_" + gene_transcript.transcriptName, samples, new ArrayList<VariantAndCsq>(uniq));
LOG.info("dumped" + gene_transcript.geneName);
}
LOG.info("loop over geneName");
for (final String geneName : geneNames) {
final SortedSet<VariantAndCsq> lis_nm = new TreeSet<>(this.variantAndCsqComparator);
final SortedSet<VariantAndCsq> lis_all = new TreeSet<>(this.variantAndCsqComparator);
final SortedSet<VariantAndCsq> lis_refseq = new TreeSet<>(this.variantAndCsqComparator);
final SortedSet<VariantAndCsq> lis_enst = new TreeSet<>(this.variantAndCsqComparator);
LOG.info("loop over gene2variants");
for (final GeneTranscript gene_transcript : gene2variants.keySet()) {
if (!geneName.equals(gene_transcript.geneName))
continue;
lis_all.addAll(gene2variants.get(gene_transcript));
if (gene_transcript.transcriptName.startsWith("NM_")) {
lis_nm.addAll(gene2variants.get(gene_transcript));
}
if (!gene_transcript.transcriptName.startsWith("ENST")) {
lis_refseq.addAll(gene2variants.get(gene_transcript));
}
if (gene_transcript.transcriptName.startsWith("ENST")) {
lis_enst.addAll(gene2variants.get(gene_transcript));
}
}
LOG.info("dump_ALL_TRANSCRIPTS");
dumpVariants(zout, prev_chrom, geneName + "_ALL_TRANSCRIPTS", samples, new ArrayList<VariantAndCsq>(lis_all));
LOG.info("dump_ALL_NM");
dumpVariants(zout, prev_chrom, geneName + "_ALL_NM", samples, new ArrayList<VariantAndCsq>(lis_nm));
LOG.info("dump_ALL_REFSEQ");
dumpVariants(zout, prev_chrom, geneName + "_ALL_REFSEQ", samples, new ArrayList<VariantAndCsq>(lis_refseq));
LOG.info("dump_ALL_ENST");
dumpVariants(zout, prev_chrom, geneName + "_ALL_ENST", samples, new ArrayList<VariantAndCsq>(lis_enst));
}
if (ctx1 == null)
break;
LOG.info("gene2variants");
gene2variants.clear();
LOG.info("System.gc();");
System.gc();
prev_chrom = ctx1.getContig();
LOG.info("prev_chrom=" + prev_chrom);
}
final Set<GeneTranscript> seen_names = new HashSet<>();
for (final VepPredictionParser.VepPrediction pred : vepPredParser.getPredictions(ctx1)) {
String geneName = pred.getSymbol();
if (geneName == null || geneName.trim().isEmpty())
continue;
if (this._gene2seen != null) {
if (!this._gene2seen.containsKey(geneName))
continue;
}
final String transcriptName = pred.getFeature();
if (transcriptName == null || transcriptName.trim().isEmpty()) {
LOG.info("No transcript in " + ctx1);
continue;
}
final GeneTranscript geneTranscript = new GeneTranscript(geneName, transcriptName);
if (seen_names.contains(geneTranscript))
continue;
boolean ok = false;
for (SequenceOntologyTree.Term so : pred.getSOTerms()) {
if (acn.contains(so)) {
ok = true;
}
}
if (!printSOTerms && !ok)
continue;
List<VariantAndCsq> L = gene2variants.get(geneTranscript);
if (L == null) {
L = new ArrayList<>();
gene2variants.put(geneTranscript, L);
}
Float vqslod = null;
if (this.printVQSLOD && ctx1.hasAttribute("VQSLOD")) {
vqslod = (float) ctx1.getAttributeAsDouble("VQSLOD", -9999999.0);
}
L.add(new VariantAndCsq(ctx1, pred.getSOTerms(), pred.getPositionInCDS(), vqslod));
seen_names.add(geneTranscript);
if (this._gene2seen != null) {
this._gene2seen.put(geneTranscript.geneName, Boolean.TRUE);
}
}
}
if (this._gene2seen != null) {
final List<VariantAndCsq> emptylist = Collections.emptyList();
for (final String gene : this._gene2seen.keySet()) {
if (this._gene2seen.get(gene).equals(Boolean.TRUE))
continue;
LOG.warning("Gene not found : " + gene);
dumpVariants(zout, "UNDEFINED", gene + "_000000000000000.txt", samples, emptylist);
}
}
progress.finish();
zout.finish();
fout.flush();
zout.flush();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(zout);
CloserUtil.close(fout);
}
}
use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree in project jvarkit by lindenb.
the class TestNg01 method testVcfFilterSo.
@Test
public void testVcfFilterSo() throws IOException {
File output = new File(TEST_RESULTS_DIR, "jeter.filrerso.vcf");
final AnnPredictionParser parser = new AnnPredictionParserFactory().createDefaultParser();
final SequenceOntologyTree tree = SequenceOntologyTree.getInstance();
String acn = "SO:0001583";
final SequenceOntologyTree.Term term = tree.getTermByAcn(acn);
final Set<SequenceOntologyTree.Term> terms = term.getAllDescendants();
Assert.assertNotNull(term);
Assert.assertTrue(terms.size() > 1);
Assert.assertTrue(terms.contains(term));
Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, VCF01 }));
streamVcf(output).forEach(V -> {
// System.err.println(V.getAttribute("ANN")+" vs "+ terms);
Assert.assertTrue(parser.getPredictions(V).stream().flatMap(P -> P.getSOTerms().stream()).anyMatch(T -> terms.contains(T)));
});
Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, "--rmatt", "--invert", VCF01 }));
streamVcf(output).forEach(V -> {
Assert.assertFalse(parser.getPredictions(V).stream().flatMap(P -> P.getSOTerms().stream()).anyMatch(T -> terms.contains(T)));
});
Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, "--rmatt", VCF01 }));
Assert.assertTrue(streamVcf(output).findAny().isPresent());
Assert.assertTrue(output.delete());
}
Aggregations