Examples with GOParser - com.github.lindenb.jvarkit.go.GOParser

Example 1 with GOParser

use of com.github.lindenb.jvarkit.go.GOParser in project jvarkit by lindenb.

the class GoGeneReporter method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        final String input = oneFileOrNull(args);
        final List<List<String>> table = new ArrayList<>();
        try (BufferedReader br = super.openBufferedReader(input)) {
            String line;
            while ((line = br.readLine()) != null) {
                final List<String> tokens = CharSplitter.TAB.splitAsStringList(line);
                if (tokens.size() < this.geneColumnName1) {
                    throw new JvarkitException.TokenErrors("expected " + this.geneColumnName1 + " columns", tokens);
                }
                table.add(tokens);
            }
        }
        if (table.isEmpty()) {
            LOG.info("No data. Bye");
            return 0;
        }
        final Set<String> geneNames = table.stream().skip(first_line_is_header ? 1L : 0L).map(T -> T.get(geneColumnName1 - 1)).collect(Collectors.toSet());
        final Map<String, Set<GOOntology.Term>> gene2go = new HashMap<>(geneNames.size());
        final GOOntology mainGoTree = new GOParser().setDebug(false).parseOBO(this.goURI);
        final Set<GOOntology.Term> limitToTerms;
        if (StringUtils.isBlank(this.limitTermStr)) {
            limitToTerms = null;
        } else {
            limitToTerms = Arrays.stream(this.limitTermStr.split("[ ,\t\n]+")).map(S -> {
                GOOntology.Term term = mainGoTree.getTermByAccession(S);
                if (term == null)
                    term = mainGoTree.getTermByName(S);
                if (term == null)
                    throw new IllegalArgumentException("Cannot find GO term : " + S);
                return term;
            }).collect(Collectors.toSet());
        }
        try (GOAFileIterator goain = GOAFileIterator.newInstance(this.goaUri)) {
            while (goain.hasNext()) {
                final GOAFileIterator.GafRecord rec = goain.next();
                if (rec.getQualifiers().contains("NOT"))
                    continue;
                if (!geneNames.contains(rec.getObjectSymbol()))
                    continue;
                final GOOntology.Term term = mainGoTree.getTermByAccession(rec.getGoId());
                if (term == null) {
                    LOG.warn("Cannot find GO term " + rec.getGoId());
                    continue;
                }
                Set<GOOntology.Term> acns = gene2go.get(rec.getObjectSymbol());
                if (acns == null) {
                    acns = new HashSet<>();
                    gene2go.put(rec.getObjectSymbol(), acns);
                }
                acns.add(term);
            }
        }
        LOG.warn("No GO term was found associated to the following genes:" + geneNames.stream().filter(G -> !gene2go.containsKey(G)).collect(Collectors.joining(" ")));
        Reporter reporter = new TextReporter(super.openPathOrStdoutAsPrintWriter(this.outputFile));
        reporter.beginDoc();
        for (final GOOntology.Term term : mainGoTree.getTerms()) {
            Objects.requireNonNull(term);
            if (limitToTerms != null && limitToTerms.stream().noneMatch(T -> term.isDescendantOf(T)))
                continue;
            final Set<String> displayGenes = gene2go.entrySet().stream().filter(KV -> KV.getValue().stream().anyMatch(TERM -> TERM.isDescendantOf(term))).map(KV -> KV.getKey()).collect(Collectors.toSet());
            if (displayGenes.isEmpty())
                continue;
            reporter.report(term, displayGenes, table);
        }
        reporter.endDoc();
        reporter.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}

Also used : Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Parameter(com.beust.jcommander.Parameter) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) GOAFileIterator(com.github.lindenb.jvarkit.goa.GOAFileIterator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Map(java.util.Map) XMLStreamException(javax.xml.stream.XMLStreamException) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) GOOntology(com.github.lindenb.jvarkit.go.GOOntology) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) GOParser(com.github.lindenb.jvarkit.go.GOParser) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Objects(java.util.Objects) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) BufferedReader(java.io.BufferedReader) GOAFileIterator(com.github.lindenb.jvarkit.goa.GOAFileIterator) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GOParser(com.github.lindenb.jvarkit.go.GOParser) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) GOOntology(com.github.lindenb.jvarkit.go.GOOntology)

Example 2 with GOParser

use of com.github.lindenb.jvarkit.go.GOParser in project jvarkit by lindenb.

the class VcfBurdenGoEnrichment method doWork.

@Override
public int doWork(final List<String> args) {
    if (StringUtil.isBlank(this.goURI)) {
        LOG.error("Undefined GOs uri.");
        return -1;
    }
    if (this.geneFile == null || !this.geneFile.exists()) {
        LOG.error("Undefined gene file option.");
        return -1;
    }
    try {
        final GOOntology gotree = new GOParser().parseOBO(this.goURI);
        List<GOOntology.Term> terms = new ArrayList<>(gotree.getTerms());
        final Map<GOOntology.Term, Node> term2node = new HashMap<>();
        // build the node TREE
        while (!terms.isEmpty()) {
            int i = 0;
            while (i < terms.size()) {
                final GOOntology.Term t = terms.get(i);
                if (!t.hasRelations()) {
                    term2node.put(t, new Node(t));
                    terms.remove(i);
                } else if (t.getRelations().stream().allMatch(L -> term2node.containsKey(L.getTo()))) {
                    final Node n = new Node(t);
                    n.parents.addAll(t.getRelations().stream().map(L -> term2node.get(L.getTo())).collect(Collectors.toSet()));
                    term2node.put(t, n);
                    terms.remove(i);
                } else {
                    i++;
                }
            }
        }
        terms = null;
        final Set<String> unknownAcn = new HashSet<>();
        final Map<String, Set<Node>> gene2node = new HashMap<>();
        final BufferedReader r = IOUtils.openFileForBufferedReading(this.geneFile);
        String line;
        while ((line = r.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final int t = line.indexOf('\t');
            if (t == -1) {
                r.close();
                LOG.error("tab missing in " + line + " of " + this.geneFile);
                return -1;
            }
            final String gene = line.substring(0, t).trim();
            if (StringUtil.isBlank(gene)) {
                r.close();
                LOG.error("Emtpy gene in " + line);
                return -1;
            }
            // using getTermByName because found sysnonym in GOA
            final String termAcn = line.substring(t + 1).trim();
            if (unknownAcn.contains(termAcn))
                continue;
            final GOOntology.Term term = gotree.getTermByName(termAcn);
            if (term == null && !unknownAcn.contains(termAcn)) {
                unknownAcn.add(termAcn);
                LOG.warning("Don't know this GO term in " + line + " of " + this.geneFile + ". Could be obsolete, synonym, go specific division. Skipping.");
                continue;
            }
            final Node node = term2node.get(term);
            if (node == null) {
                r.close();
                LOG.error("Don't know this node in " + line + " of " + this.geneFile);
                return -1;
            }
            Set<Node> nodes = gene2node.get(gene);
            if (nodes == null) {
                nodes = new HashSet<>();
                gene2node.put(gene, nodes);
            }
            node.numGenes++;
            nodes.add(node);
        }
        ;
        // clean up
        unknownAcn.clear();
        r.close();
        final VCFIterator iter = openVCFIterator(oneFileOrNull(args));
        final VCFHeader header = iter.getHeader();
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final Set<Pedigree.Person> persons;
        if (this.pedFile != null) {
            final Pedigree pedigree = Pedigree.newParser().parse(this.pedFile);
            persons = new Pedigree.CaseControlExtractor().extract(header, pedigree);
        } else {
            persons = new Pedigree.CaseControlExtractor().extract(header);
        }
        final Set<Pedigree.Person> affected = persons.stream().filter(P -> P.isAffected()).collect(Collectors.toSet());
        final Set<Pedigree.Person> unaffected = persons.stream().filter(P -> P.isUnaffected()).collect(Collectors.toSet());
        if (affected.isEmpty()) {
            LOG.error("No Affected individual");
            return -1;
        }
        if (unaffected.isEmpty()) {
            LOG.error("No unaffected individual");
            return -1;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final Predicate<Genotype> isWildGenotype = G -> {
            if (G == null)
                return false;
            return G.isHomRef();
        };
        final Predicate<Genotype> isAltGenotype = G -> {
            if (G == null)
                return false;
            return G.isCalled() && !G.isHomRef();
        };
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            if (!this.variantFilter.test(ctx))
                continue;
            final Set<String> genes = new HashSet<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token)) {
                        genes.add(token);
                    }
                }
            }
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token)) {
                    genes.add(token);
                }
            }
            if (genes.isEmpty())
                continue;
            final Set<Node> nodes = genes.stream().filter(G -> gene2node.containsKey(G)).flatMap(G -> gene2node.get(G).stream()).collect(Collectors.toSet());
            if (nodes.isEmpty())
                continue;
            final long unaffected_alt = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            final long affected_alt = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            /* no informative */
            if (unaffected_alt + affected_alt == 0L) {
                continue;
            }
            final long affected_ref = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            final long unaffected_ref = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            nodes.stream().forEach(N -> N.resetVisitedFlag());
            nodes.stream().forEach(N -> N.visit(unaffected_ref, unaffected_alt, affected_ref, affected_alt));
        }
        iter.close();
        progress.finish();
        LOG.info("Calculating Fisher and dumping.. please wait");
        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.println("#go_term\tfisher\tname\tgo_term_depth\tcount_genes_in_this_node" + "\tunaffected_ref_gt" + "\tunaffected_alt_gt" + "\taffected_ref_gt" + "\taffected_alt_gt");
        term2node.values().stream().filter(N -> this.show_never_seeen_term || N.sum() > 0L).sorted((n1, n2) -> Double.compare(n1.fisher(), n2.fisher())).forEach(N -> {
            pw.print(N.goTerm.getAcn());
            pw.print('\t');
            pw.print(N.fisher());
            pw.print("\t");
            pw.print(N.goTerm.getName().replaceAll("[ \',\\-]+", "_"));
            pw.print("\t");
            pw.print(N.goTerm.getMinDepth());
            pw.print('\t');
            pw.print(N.numGenes);
            pw.print('\t');
            pw.print(N.unaffected_ref);
            pw.print('\t');
            pw.print(N.unaffected_alt);
            pw.print('\t');
            pw.print(N.affected_ref);
            pw.print('\t');
            pw.print(N.affected_alt);
            pw.println();
        });
        pw.flush();
        pw.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}

Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Arrays(java.util.Arrays) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) VCFIterator(htsjdk.variant.vcf.VCFIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) GOOntology(com.github.lindenb.jvarkit.go.GOOntology) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) PrintWriter(java.io.PrintWriter) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) GOParser(com.github.lindenb.jvarkit.go.GOParser) File(java.io.File) List(java.util.List) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GOParser(com.github.lindenb.jvarkit.go.GOParser) Genotype(htsjdk.variant.variantcontext.Genotype) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) BufferedReader(java.io.BufferedReader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) GOOntology(com.github.lindenb.jvarkit.go.GOOntology)

Example 3 with GOParser

use of com.github.lindenb.jvarkit.go.GOParser in project jvarkit by lindenb.

the class GoUtils method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        this.mainGoTree = new GOParser().setDebug(this.do_debug).parseOBO(this.goURI);
        final Map<GOOntology.Term, UserTerm> userTerms = new HashMap<>();
        for (final String s : this.userAccStrings) {
            if (StringUtil.isBlank(s))
                continue;
            final GOOntology.Term t = this.mainGoTree.getTermByAccessionOrName(s);
            if (t == null) {
                LOG.error("cannot find user term \"" + s + "\"");
                return -1;
            }
            userTerms.put(t, new UserTerm(t));
        }
        final Predicate<GOOntology.Term> keepTerm = T -> {
            boolean keep = false;
            if (userTerms.isEmpty()) {
                keep = true;
            } else if (userTerms.keySet().stream().anyMatch(USERTERM -> (T.isDescendantOf(USERTERM)))) {
                keep = true;
            }
            if (this.inverse)
                keep = !keep;
            return keep;
        };
        if (this.accessionFile != null) {
            final ColorUtils colorUtils = new ColorUtils();
            try (BufferedReader r = IOUtils.openPathForBufferedReading(this.accessionFile)) {
                String line;
                while ((line = r.readLine()) != null) {
                    if (line.isEmpty() || line.startsWith("#"))
                        continue;
                    int last = 0;
                    for (last = 0; last < line.length(); ++last) {
                        if (Character.isWhitespace(line.charAt(last)))
                            break;
                    }
                    final String s = line.substring(0, last);
                    GOOntology.Term t = this.mainGoTree.getTermByAccessionOrName(s);
                    if (t == null) {
                        LOG.error("In " + this.accessionFile + " cannot find user term \"" + s + "\"");
                        return -1;
                    }
                    final UserTerm ut = new UserTerm(t);
                    userTerms.put(t, ut);
                    switch(this.action) {
                        case dump_gexf:
                            {
                                for (final String left : line.substring(last).trim().split("[ \t;]+")) {
                                    if (left.isEmpty()) {
                                    // cont
                                    } else if (left.startsWith("color=") && ut.vizColor == null) {
                                        ut.vizColor = colorUtils.parse(left.substring(6));
                                    } else if (left.startsWith("size=") && ut.vizSize == null) {
                                        ut.vizSize = Double.parseDouble(left.substring(5));
                                    } else {
                                        LOG.warning("Ignoring unknown modifier " + left + " in " + line);
                                    }
                                }
                                break;
                            }
                        default:
                            break;
                    }
                }
            }
        }
        switch(this.action) {
            case dump_gexf:
                {
                    final XMLOutputFactory xof = XMLOutputFactory.newFactory();
                    XMLStreamWriter w = null;
                    FileWriter fw = null;
                    if (this.outputFile == null) {
                        w = xof.createXMLStreamWriter(stdout(), "UTF-8");
                    } else {
                        w = xof.createXMLStreamWriter((fw = new FileWriter(this.outputFile)));
                    }
                    final Function<GOOntology.Term, String> term2str = T -> T.getAcn().replaceAll("[\\:_#]+", "_");
                    w.writeStartDocument("UTF-8", "1.0");
                    w.writeStartElement("gexf");
                    w.writeAttribute("xmlns", GexfConstants.XMLNS);
                    w.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
                    w.writeAttribute("xmlns:viz", GexfConstants.XMLNS_VIZ);
                    w.writeAttribute("xsi:schemaLocation", GexfConstants.XSI_SCHEMA_LOCATION);
                    w.writeAttribute("version", GexfConstants.VERSION);
                    w.writeStartElement("meta");
                    w.writeStartElement("creator");
                    w.writeCharacters(getClass().getName() + " by Pierre Lindenbaum");
                    w.writeEndElement();
                    w.writeStartElement("description");
                    w.writeCharacters("Gene Ontology Tree to Gexf :" + getProgramCommandLine());
                    w.writeEndElement();
                    // meta
                    w.writeEndElement();
                    w.writeStartElement("graph");
                    w.writeAttribute("mode", "static");
                    w.writeAttribute("defaultedgetype", "directed");
                    w.writeStartElement("attributes");
                    w.writeAttribute("class", "edge");
                    w.writeAttribute("mode", "static");
                    // attributes
                    w.writeEndElement();
                    w.writeStartElement("attributes");
                    w.writeAttribute("class", "node");
                    w.writeAttribute("mode", "static");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "0");
                    w.writeAttribute("title", "description");
                    w.writeAttribute("type", "string");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "1");
                    w.writeAttribute("title", "accession");
                    w.writeAttribute("type", "string");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "2");
                    w.writeAttribute("title", "userTerm");
                    w.writeAttribute("type", "boolean");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "3");
                    w.writeAttribute("title", "parentOfUserTerm");
                    w.writeAttribute("type", "boolean");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "4");
                    w.writeAttribute("title", "childOffUserTerm");
                    w.writeAttribute("type", "boolean");
                    w.writeEmptyElement("attribute");
                    w.writeAttribute("id", "5");
                    w.writeAttribute("title", "division");
                    w.writeAttribute("type", "boolean");
                    // attributes
                    w.writeEndElement();
                    w.writeStartElement("nodes");
                    w.writeAttribute("count", String.valueOf(this.mainGoTree.size()));
                    for (final GOOntology.Term term : this.mainGoTree.getTerms()) {
                        final UserTerm ut = userTerms.get(term);
                        w.writeStartElement("node");
                        w.writeAttribute("id", term2str.apply(term));
                        w.writeAttribute("label", term.getName());
                        w.writeStartElement("attvalues");
                        w.writeEmptyElement("attvalue");
                        w.writeAttribute("for", "0");
                        w.writeAttribute("value", term.getDefinition());
                        w.writeEmptyElement("attvalue");
                        w.writeAttribute("for", "1");
                        w.writeAttribute("value", term.getAcn());
                        w.writeEmptyElement("attvalue");
                        w.writeAttribute("for", "2");
                        w.writeAttribute("value", String.valueOf(ut != null));
                        w.writeEmptyElement("attvalue");
                        // is parent of any user term
                        w.writeAttribute("for", "3");
                        w.writeAttribute("value", String.valueOf(userTerms.keySet().stream().anyMatch(T -> T.isDescendantOf(term))));
                        w.writeEmptyElement("attvalue");
                        // is child of any user term
                        w.writeAttribute("for", "4");
                        w.writeAttribute("value", String.valueOf(userTerms.keySet().stream().anyMatch(T -> term.isDescendantOf(T))));
                        w.writeEmptyElement("attvalue");
                        w.writeAttribute("for", "5");
                        w.writeAttribute("value", term.getDivision() == null ? "." : term.getDivision().name());
                        // attvalues
                        w.writeEndElement();
                        double viz_size = 1.0;
                        if (ut != null) {
                            if (ut.vizSize != null) {
                                viz_size = ut.vizSize;
                            }
                            if (ut.vizColor != null) {
                                // viz:color
                                w.writeEmptyElement("viz:color");
                                w.writeAttribute("r", String.valueOf(ut.vizColor.getRed()));
                                w.writeAttribute("g", String.valueOf(ut.vizColor.getGreen()));
                                w.writeAttribute("b", String.valueOf(ut.vizColor.getBlue()));
                                w.writeAttribute("a", String.valueOf("1.0"));
                            }
                        }
                        w.writeEmptyElement("viz:size");
                        w.writeAttribute("value", String.valueOf(viz_size));
                        // node
                        w.writeEndElement();
                    }
                    // nodes
                    w.writeEndElement();
                    w.writeStartElement("edges");
                    w.writeAttribute("count", String.valueOf(this.mainGoTree.getTerms().stream().mapToInt(N -> N.getRelations().size()).sum()));
                    for (final GOOntology.Term term : this.mainGoTree.getTerms()) {
                        for (final GOOntology.Relation rel : term.getRelations()) {
                            w.writeStartElement("edge");
                            w.writeAttribute("id", "E" + term2str.apply(term) + "_" + term2str.apply(rel.getTo()));
                            w.writeAttribute("type", "directed");
                            w.writeAttribute("source", term2str.apply(term));
                            w.writeAttribute("target", term2str.apply(rel.getTo()));
                            w.writeAttribute("label", rel.getType());
                            w.writeAttribute("weight", String.valueOf(1));
                            final Color vizColor = Color.BLACK;
                            // viz:color
                            w.writeEmptyElement("viz:color");
                            w.writeAttribute("r", String.valueOf(vizColor.getRed()));
                            w.writeAttribute("g", String.valueOf(vizColor.getGreen()));
                            w.writeAttribute("b", String.valueOf(vizColor.getBlue()));
                            w.writeAttribute("a", String.valueOf("1.0"));
                            w.writeEndElement();
                        }
                    }
                    // edges
                    w.writeEndElement();
                    // graph
                    w.writeEndElement();
                    // gexf
                    w.writeEndElement();
                    w.writeEndDocument();
                    w.flush();
                    if (fw != null) {
                        fw.flush();
                        CloserUtil.close(fw);
                    } else {
                        System.out.flush();
                    }
                    break;
                }
            case goa:
                {
                    if (!args.isEmpty()) {
                        LOG.error("too many arguments");
                        return -1;
                    }
                    final String input;
                    if (StringUtil.isBlank(this.goaURI)) {
                        input = oneFileOrNull(args);
                    } else {
                        input = this.goaURI;
                    }
                    final Set<String> acns_set = this.mainGoTree.getTerms().stream().filter(keepTerm).map(T -> T.getAcn()).collect(Collectors.toSet());
                    try (BufferedReader br = IOUtils.openURIForBufferedReading(this.goaURI)) {
                        try (GOAFileIterator goain = GOAFileIterator.newInstance(br)) {
                            try (PrintWriter out = super.openFileOrStdoutAsPrintWriter(this.outputFile)) {
                                while (goain.hasNext()) {
                                    final GOAFileIterator.GafRecord rec = goain.next();
                                    if (rec.getQualifiers().contains("NOT"))
                                        continue;
                                    if (!acns_set.contains(rec.getGoId()))
                                        continue;
                                    out.println(rec.toString());
                                }
                                out.flush();
                            }
                        }
                    }
                    break;
                }
            case gff3:
                {
                    if (!args.isEmpty()) {
                        LOG.error("too many arguments");
                        return -1;
                    }
                    if (StringUtil.isBlank(this.goaURI)) {
                        LOG.error("undefined GOA-URI");
                        return -1;
                    }
                    final String input;
                    if (!StringUtils.isBlank(this.gffPath)) {
                        input = oneFileOrNull(args);
                    } else {
                        input = this.gffPath;
                    }
                    final Set<String> acns_set = this.mainGoTree.getTerms().stream().filter(keepTerm).map(T -> T.getAcn()).collect(Collectors.toSet());
                    final Set<String> geneNames = new HashSet<>();
                    try (BufferedReader br = IOUtils.openURIForBufferedReading(this.goaURI)) {
                        try (GOAFileIterator goain = GOAFileIterator.newInstance(br)) {
                            while (goain.hasNext()) {
                                final GOAFileIterator.GafRecord rec = goain.next();
                                if (rec.getQualifiers().contains("NOT"))
                                    continue;
                                if (!acns_set.contains(rec.getGoId()))
                                    continue;
                                geneNames.add(rec.getObjectSymbol());
                            }
                        }
                    }
                    final Gff3Codec gff3 = new Gff3Codec(DecodeDepth.DEEP);
                    try (InputStream is = (input == null ? stdin() : IOUtils.openURIForReading(input))) {
                        final AsciiLineReader asciiLineReader = AsciiLineReader.from(is);
                        final LineIterator lr = new LineIteratorImpl(asciiLineReader);
                        try (OutputStream out = super.openFileOrStdoutAsStream(this.outputFile)) {
                            Gff3Writer gw = new Gff3Writer(out);
                            while (!gff3.isDone(lr)) {
                                dumpGff3(gw, gff3.decode(lr), geneNames);
                            }
                            out.flush();
                        }
                        gff3.close(lr);
                        asciiLineReader.close();
                    }
                    break;
                }
            // through
            case dump_table:
            default:
                {
                    if (!args.isEmpty()) {
                        LOG.error("too many arguments");
                        return -1;
                    }
                    try (PrintWriter out = super.openFileOrStdoutAsPrintWriter(this.outputFile)) {
                        out.println("#ACN\tNAME\tDEFINITION\tDIVISION");
                        for (final GOOntology.Term t : this.mainGoTree.getTerms()) {
                            if (keepTerm.test(t)) {
                                out.print(t.getAcn());
                                out.print('\t');
                                out.print(t.getName());
                                out.print('\t');
                                out.print(t.getDefinition());
                                out.print('\t');
                                out.print(t.getDivision() == null ? "." : t.getDivision().name());
                                out.println();
                            }
                        }
                        out.flush();
                    }
                    break;
                }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}

Also used : Color(java.awt.Color) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) Program(com.github.lindenb.jvarkit.util.jcommander.Program) LineIterator(htsjdk.tribble.readers.LineIterator) Parameter(com.beust.jcommander.Parameter) HashMap(java.util.HashMap) Function(java.util.function.Function) HashSet(java.util.HashSet) Gff3Writer(htsjdk.tribble.gff.Gff3Writer) GOAFileIterator(com.github.lindenb.jvarkit.goa.GOAFileIterator) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) GOOntology(com.github.lindenb.jvarkit.go.GOOntology) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) OutputStream(java.io.OutputStream) PrintWriter(java.io.PrintWriter) Gff3Feature(htsjdk.tribble.gff.Gff3Feature) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) FileWriter(java.io.FileWriter) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) GOParser(com.github.lindenb.jvarkit.go.GOParser) File(java.io.File) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) Gff3Codec(htsjdk.tribble.gff.Gff3Codec) DecodeDepth(htsjdk.tribble.gff.Gff3Codec.DecodeDepth) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) GexfConstants(com.github.lindenb.jvarkit.gexf.GexfConstants) ColorUtils(com.github.lindenb.jvarkit.util.swing.ColorUtils) BufferedReader(java.io.BufferedReader) InputStream(java.io.InputStream) GOAFileIterator(com.github.lindenb.jvarkit.goa.GOAFileIterator) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) FileWriter(java.io.FileWriter) OutputStream(java.io.OutputStream) ColorUtils(com.github.lindenb.jvarkit.util.swing.ColorUtils) LineIterator(htsjdk.tribble.readers.LineIterator) Function(java.util.function.Function) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) PrintWriter(java.io.PrintWriter) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) InputStream(java.io.InputStream) Color(java.awt.Color) GOParser(com.github.lindenb.jvarkit.go.GOParser) Gff3Codec(htsjdk.tribble.gff.Gff3Codec) BufferedReader(java.io.BufferedReader) Gff3Writer(htsjdk.tribble.gff.Gff3Writer) GOOntology(com.github.lindenb.jvarkit.go.GOOntology)

Aggregations

Parameter (com.beust.jcommander.Parameter)3 GOOntology (com.github.lindenb.jvarkit.go.GOOntology)3 GOParser (com.github.lindenb.jvarkit.go.GOParser)3 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)3 Logger (com.github.lindenb.jvarkit.util.log.Logger)3 BufferedReader (java.io.BufferedReader)3 PrintWriter (java.io.PrintWriter)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 List (java.util.List)3 Map (java.util.Map)3 Set (java.util.Set)3 Collectors (java.util.stream.Collectors)3 GOAFileIterator (com.github.lindenb.jvarkit.goa.GOAFileIterator)2 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)2 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)2 Program (com.github.lindenb.jvarkit.util.jcommander.Program)2 StringUtil (htsjdk.samtools.util.StringUtil)2 File (java.io.File)2 Predicate (java.util.function.Predicate)2