Search in sources :

Example 1 with ExtendedNewickParser

use of bacter.util.parsers.ExtendedNewickParser in project bacter by tgvaughan.

the class ConversionGraph method fromExtendedNewick.

/**
 * Read in an ACG from a string in extended newick format.  Assumes
 * that the network is stored with exactly the same metadata as written
 * by the getExtendedNewick() method.
 *
 * @param string extended newick representation of ACG
 * @param numbered true indicates that the ACG is numbered.
 */
public void fromExtendedNewick(String string, boolean numbered, int nodeNumberoffset) {
    // Spin up ANTLR
    CharStream input = CharStreams.fromString(string);
    ExtendedNewickLexer lexer = new ExtendedNewickLexer(input);
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    ExtendedNewickParser parser = new ExtendedNewickParser(tokens);
    ParseTree parseTree = parser.tree();
    Map<String, Conversion> convIDMap = new HashMap<>();
    Node root = new ExtendedNewickBaseVisitor<Node>() {

        /**
         * Convert branch lengths to node heights for all nodes in clade.
         *
         * @param node clade parent
         * @return minimum height assigned in clade.
         */
        private double branchLengthsToHeights(Node node) {
            if (node.isRoot())
                node.setHeight(0.0);
            else
                node.setHeight(node.getParent().getHeight() - node.getHeight());
            double minHeight = node.getHeight();
            for (Node child : node.getChildren()) {
                minHeight = Math.min(minHeight, branchLengthsToHeights(child));
            }
            return minHeight;
        }

        /**
         * Remove height offset from all nodes in clade
         * @param node parent of clade
         * @param offset offset to remove
         */
        private void removeOffset(Node node, double offset) {
            node.setHeight(node.getHeight() - offset);
            for (Node child : node.getChildren()) removeOffset(child, offset);
        }

        private Node getTrueNode(Node node) {
            if (node.isLeaf()) {
                assert !convIDMap.containsKey(node.getID());
                return node;
            }
            if (convIDMap.containsKey(node.getID()))
                return getTrueNode(node.getChild(0));
            int hybridIdx = -1;
            int nonHybridIdx = -1;
            for (int i = 0; i < node.getChildCount(); i++) {
                if (node.getChild(i).isLeaf() && convIDMap.containsKey(node.getChild(i).getID()))
                    hybridIdx = i;
                else
                    nonHybridIdx = i;
            }
            if (hybridIdx > 0)
                return getTrueNode(node.getChild(nonHybridIdx));
            return node;
        }

        /**
         * Traverse the newly constructed tree looking for
         * hybrid nodes and using these to set the heights of
         * Conversion objects.
         *
         * @param node parent of clade
         */
        private void findConversionAttachments(Node node) {
            if (convIDMap.containsKey(node.getID())) {
                Conversion conv = convIDMap.get(node.getID());
                if (node.isLeaf()) {
                    conv.setHeight1(node.getHeight());
                    conv.setHeight2(node.getParent().getHeight());
                    conv.setNode2(getTrueNode(node.getParent()));
                } else
                    conv.setNode1(getTrueNode(node));
            }
            for (Node child : node.getChildren()) findConversionAttachments(child);
        }

        /**
         * Remove all conversion-associated nodes, leaving only
         * the clonal frame.
         *
         * @param node parent of clade
         * @return new parent of same clade
         */
        private Node stripHybridNodes(Node node) {
            Node trueNode = getTrueNode(node);
            List<Node> trueChildren = new ArrayList<>();
            for (Node child : trueNode.getChildren()) {
                trueChildren.add(stripHybridNodes(child));
            }
            trueNode.removeAllChildren(false);
            for (Node trueChild : trueChildren) trueNode.addChild(trueChild);
            return trueNode;
        }

        private int numberInternalNodes(Node node, int nextNr) {
            if (node.isLeaf())
                return nextNr;
            for (Node child : node.getChildren()) nextNr = numberInternalNodes(child, nextNr);
            node.setNr(nextNr);
            return nextNr + 1;
        }

        @Override
        public Node visitTree(ExtendedNewickParser.TreeContext ctx) {
            Node root = visitNode(ctx.node());
            double minHeight = branchLengthsToHeights(root);
            removeOffset(root, minHeight);
            findConversionAttachments(root);
            root = stripHybridNodes(root);
            root.setParent(null);
            if (!numbered)
                numberInternalNodes(root, root.getAllLeafNodes().size());
            return root;
        }

        @Override
        public Node visitNode(ExtendedNewickParser.NodeContext ctx) {
            Node node = new Node();
            if (ctx.post().hybrid() != null) {
                String convID = ctx.post().hybrid().getText();
                node.setID(convID);
                Conversion conv;
                if (convIDMap.containsKey(convID))
                    conv = convIDMap.get(convID);
                else {
                    conv = new Conversion();
                    convIDMap.put(convID, conv);
                }
                if (ctx.node().isEmpty()) {
                    String locusID;
                    for (ExtendedNewickParser.AttribContext attribCtx : ctx.post().meta().attrib()) {
                        switch(attribCtx.attribKey.getText()) {
                            case "region":
                                conv.setStartSite(Integer.parseInt(attribCtx.attribValue().vector().attribValue(0).getText()));
                                conv.setEndSite(Integer.parseInt(attribCtx.attribValue().vector().attribValue(1).getText()));
                                break;
                            case "locus":
                                locusID = attribCtx.attribValue().getText();
                                if (locusID.startsWith("\""))
                                    locusID = locusID.substring(1, locusID.length() - 1);
                                Locus locus = null;
                                for (Locus thisLocus : getLoci()) {
                                    if (thisLocus.getID().equals(locusID))
                                        locus = thisLocus;
                                }
                                if (locus == null)
                                    throw new IllegalArgumentException("Locus with ID " + locusID + " not found.");
                                conv.setLocus(locus);
                                break;
                            default:
                                break;
                        }
                    }
                }
            }
            for (ExtendedNewickParser.NodeContext childCtx : ctx.node()) node.addChild(visitNode(childCtx));
            if (ctx.post().label() != null) {
                node.setID(ctx.post().label().getText());
                node.setNr(Integer.parseInt(ctx.post().label().getText()) - nodeNumberoffset);
            }
            node.setHeight(Double.parseDouble(ctx.post().length.getText()));
            return node;
        }
    }.visit(parseTree);
    m_nodes = root.getAllChildNodesAndSelf().toArray(m_nodes);
    nodeCount = m_nodes.length;
    leafNodeCount = root.getAllLeafNodes().size();
    setRoot(root);
    initArrays();
    for (Locus locus : getLoci()) convs.get(locus).clear();
    for (Conversion conv : convIDMap.values()) addConversion(conv);
}
Also used : CommonTokenStream(org.antlr.v4.runtime.CommonTokenStream) ExtendedNewickLexer(bacter.util.parsers.ExtendedNewickLexer) Node(beast.evolution.tree.Node) CharStream(org.antlr.v4.runtime.CharStream) ExtendedNewickParser(bacter.util.parsers.ExtendedNewickParser) ParseTree(org.antlr.v4.runtime.tree.ParseTree)

Aggregations

ExtendedNewickLexer (bacter.util.parsers.ExtendedNewickLexer)1 ExtendedNewickParser (bacter.util.parsers.ExtendedNewickParser)1 Node (beast.evolution.tree.Node)1 CharStream (org.antlr.v4.runtime.CharStream)1 CommonTokenStream (org.antlr.v4.runtime.CommonTokenStream)1 ParseTree (org.antlr.v4.runtime.tree.ParseTree)1