Search in sources :

Example 1 with ParserConstraint

use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.

the class LatticeXMLReader method load.

private boolean load(InputStream stream) {
    DocumentBuilder parser = XMLUtils.getXmlParser();
    if (parser == null)
        return false;
    try {
        Document xmlDocument = parser.parse(stream);
        Element root = xmlDocument.getDocumentElement();
        NodeList sentences = root.getElementsByTagName(SENTENCE);
        for (int i = 0; i < sentences.getLength(); i++) {
            Element sentence = (Element) sentences.item(i);
            Lattice lattice = new Lattice();
            //Create the node map
            SortedSet<Integer> nodes = new TreeSet<>();
            NodeList xmlNodes = sentence.getElementsByTagName(NODE);
            for (int nodeIdx = 0; nodeIdx < xmlNodes.getLength(); nodeIdx++) {
                Element xmlNode = (Element) xmlNodes.item(nodeIdx);
                int nodeName = Integer.parseInt(xmlNode.getAttribute(NODE_ID));
                nodes.add(nodeName);
            }
            Map<Integer, Integer> nodeMap = Generics.newHashMap();
            int realNodeIdx = 0;
            int lastBoundaryNode = -1;
            for (int nodeName : nodes) {
                if (lastBoundaryNode == -1) {
                    assert nodeName % NODE_OFFSET == 0;
                    lastBoundaryNode = realNodeIdx;
                } else if (nodeName % NODE_OFFSET == 0) {
                    ParserConstraint c = new ParserConstraint(lastBoundaryNode, realNodeIdx, ".*");
                    lattice.addConstraint(c);
                }
                nodeMap.put(nodeName, realNodeIdx);
                realNodeIdx++;
            }
            //Read the edges
            NodeList xmlEdges = sentence.getElementsByTagName(EDGE);
            for (int edgeIdx = 0; edgeIdx < xmlEdges.getLength(); edgeIdx++) {
                Element xmlEdge = (Element) xmlEdges.item(edgeIdx);
                String segment = xmlEdge.getAttribute(SEGMENT);
                //Input weights should be log scale
                double weight = Double.parseDouble(xmlEdge.getAttribute(WEIGHT));
                int from = Integer.parseInt(xmlEdge.getAttribute(FROM_NODE));
                int normFrom = nodeMap.get(from);
                int to = Integer.parseInt(xmlEdge.getAttribute(TO_NODE));
                int normTo = nodeMap.get(to);
                LatticeEdge e = new LatticeEdge(segment, weight, normFrom, normTo);
                // Set attributes below here
                NodeList xmlAttrs = xmlEdge.getElementsByTagName(E_ATTR_NODE);
                for (int attrIdx = 0; attrIdx < xmlAttrs.getLength(); attrIdx++) {
                    Element xmlAttr = (Element) xmlAttrs.item(attrIdx);
                    String key = xmlAttr.getAttribute(E_ATTR);
                    String value = xmlAttr.getAttribute(E_ATTR_VAL);
                    e.setAttr(key, value);
                }
                lattice.addEdge(e);
            }
            //Configure for parsing in ExhaustivePCFG parser
            lattice.addBoundary();
            lattices.add(lattice);
        }
    } catch (IOException e) {
        System.err.printf("%s: Error reading XML from input stream.%n", this.getClass().getName());
        e.printStackTrace();
        return false;
    } catch (SAXException e) {
        e.printStackTrace();
        return false;
    }
    return true;
}
Also used : ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) Element(org.w3c.dom.Element) NodeList(org.w3c.dom.NodeList) Document(org.w3c.dom.Document) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder)

Example 2 with ParserConstraint

use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method findSyntacticHead.

public Tree findSyntacticHead(Mention m, Tree root, List<CoreLabel> tokens) {
    // mention ends with 's
    int endIdx = m.endIndex;
    if (m.originalSpan.size() > 0) {
        String lastWord = m.originalSpan.get(m.originalSpan.size() - 1).get(CoreAnnotations.TextAnnotation.class);
        if ((lastWord.equals("'s") || lastWord.equals("'")) && m.originalSpan.size() != 1)
            endIdx--;
    }
    Tree exactMatch = findTreeWithSpan(root, m.startIndex, endIdx);
    //
    if (exactMatch != null) {
        return safeHead(exactMatch, endIdx);
    }
    // context, so as to make the parser work better :-)
    if (allowReparsing) {
        int approximateness = 0;
        List<CoreLabel> extentTokens = new ArrayList<>();
        extentTokens.add(initCoreLabel("It"));
        extentTokens.add(initCoreLabel("was"));
        final int ADDED_WORDS = 2;
        for (int i = m.startIndex; i < endIdx; i++) {
            // Add everything except separated dashes! The separated dashes mess with the parser too badly.
            CoreLabel label = tokens.get(i);
            if (!"-".equals(label.word())) {
                extentTokens.add(tokens.get(i));
            } else {
                approximateness++;
            }
        }
        extentTokens.add(initCoreLabel("."));
        // constrain the parse to the part we're interested in.
        // Starting from ADDED_WORDS comes from skipping "It was".
        // -1 to exclude the period.
        // We now let it be any kind of nominal constituent, since there
        // are VP and S ones
        ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, Pattern.compile(".*"));
        List<ParserConstraint> constraints = Collections.singletonList(constraint);
        Tree tree = parse(extentTokens, constraints);
        // now unnecessary, as parser uses CoreLabels?
        convertToCoreLabels(tree);
        // remember it has ADDED_WORDS extra words at the beginning
        tree.indexSpans(m.startIndex - ADDED_WORDS);
        Tree subtree = findPartialSpan(tree, m.startIndex);
        // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
        // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
        // passed the right end (that is, just that final period).
        Tree extentHead = safeHead(subtree, endIdx);
        assert (extentHead != null);
        // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
        // Because we deleted dashes, it's index will be >= the index in the extent parse tree
        CoreLabel l = (CoreLabel) extentHead.label();
        Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
        assert (realHead != null);
        return realHead;
    }
    // If reparsing wasn't allowed, try to find a span in the tree
    // which happens to have the head
    Tree wordMatch = findTreeWithSmallestSpan(root, m.startIndex, endIdx);
    if (wordMatch != null) {
        Tree head = safeHead(wordMatch, endIdx);
        if (head != null) {
            int index = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (index >= m.startIndex && index < endIdx) {
                return head;
            }
        }
    }
    // If that didn't work, guess that it's the last word
    int lastNounIdx = endIdx - 1;
    for (int i = m.startIndex; i < m.endIndex; i++) {
        if (tokens.get(i).tag().startsWith("N"))
            lastNounIdx = i;
        else if (tokens.get(i).tag().startsWith("W"))
            break;
    }
    List<Tree> leaves = root.getLeaves();
    Tree endLeaf = leaves.get(lastNounIdx);
    return endLeaf;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 3 with ParserConstraint

use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method findSyntacticHead.

protected Tree findSyntacticHead(Mention m, Tree root, List<CoreLabel> tokens) {
    // mention ends with 's
    int endIdx = m.endIndex;
    if (m.originalSpan.size() > 0) {
        String lastWord = m.originalSpan.get(m.originalSpan.size() - 1).get(CoreAnnotations.TextAnnotation.class);
        if ((lastWord.equals("'s") || lastWord.equals("'")) && m.originalSpan.size() != 1)
            endIdx--;
    }
    Tree exactMatch = findTreeWithSpan(root, m.startIndex, endIdx);
    //
    if (exactMatch != null) {
        return safeHead(exactMatch, endIdx);
    }
    // context, so as to make the parser work better :-)
    if (allowReparsing) {
        int approximateness = 0;
        List<CoreLabel> extentTokens = new ArrayList<>();
        extentTokens.add(initCoreLabel("It"));
        extentTokens.add(initCoreLabel("was"));
        final int ADDED_WORDS = 2;
        for (int i = m.startIndex; i < endIdx; i++) {
            // Add everything except separated dashes! The separated dashes mess with the parser too badly.
            CoreLabel label = tokens.get(i);
            if (!"-".equals(label.word())) {
                // necessary to copy tokens in case the parser does things like
                // put new indices on the tokens
                extentTokens.add((CoreLabel) label.labelFactory().newLabel(label));
            } else {
                approximateness++;
            }
        }
        extentTokens.add(initCoreLabel("."));
        // constrain the parse to the part we're interested in.
        // Starting from ADDED_WORDS comes from skipping "It was".
        // -1 to exclude the period.
        // We now let it be any kind of nominal constituent, since there
        // are VP and S ones
        ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, Pattern.compile(".*"));
        List<ParserConstraint> constraints = Collections.singletonList(constraint);
        Tree tree = parse(extentTokens, constraints);
        // now unnecessary, as parser uses CoreLabels?
        convertToCoreLabels(tree);
        // remember it has ADDED_WORDS extra words at the beginning
        tree.indexSpans(m.startIndex - ADDED_WORDS);
        Tree subtree = findPartialSpan(tree, m.startIndex);
        // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
        // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
        // passed the right end (that is, just that final period).
        Tree extentHead = safeHead(subtree, endIdx);
        assert (extentHead != null);
        // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
        // Because we deleted dashes, it's index will be >= the index in the extent parse tree
        CoreLabel l = (CoreLabel) extentHead.label();
        Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
        assert (realHead != null);
        return realHead;
    }
    // If reparsing wasn't allowed, try to find a span in the tree
    // which happens to have the head
    Tree wordMatch = findTreeWithSmallestSpan(root, m.startIndex, endIdx);
    if (wordMatch != null) {
        Tree head = safeHead(wordMatch, endIdx);
        if (head != null) {
            int index = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
            if (index >= m.startIndex && index < endIdx) {
                return head;
            }
        }
    }
    // If that didn't work, guess that it's the last word
    int lastNounIdx = endIdx - 1;
    for (int i = m.startIndex; i < m.endIndex; i++) {
        if (tokens.get(i).tag().startsWith("N"))
            lastNounIdx = i;
        else if (tokens.get(i).tag().startsWith("W"))
            break;
    }
    List<Tree> leaves = root.getLeaves();
    Tree endLeaf = leaves.get(lastNounIdx);
    return endLeaf;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 4 with ParserConstraint

use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.

the class PerceptronModel method findHighestScoringTransitions.

private Collection<ScoredObject<Integer>> findHighestScoringTransitions(State state, List<String> features, boolean requireLegal, int numTransitions, List<ParserConstraint> constraints) {
    float[] scores = new float[transitionIndex.size()];
    for (String feature : features) {
        Weight weight = featureWeights.get(feature);
        if (weight == null) {
            // Features not in our index are ignored
            continue;
        }
        weight.score(scores);
    }
    PriorityQueue<ScoredObject<Integer>> queue = new PriorityQueue<>(numTransitions + 1, ScoredComparator.ASCENDING_COMPARATOR);
    for (int i = 0; i < scores.length; ++i) {
        if (!requireLegal || transitionIndex.get(i).isLegal(state, constraints)) {
            queue.add(new ScoredObject<>(i, scores[i]));
            if (queue.size() > numTransitions) {
                queue.poll();
            }
        }
    }
    return queue;
}
Also used : ScoredObject(edu.stanford.nlp.util.ScoredObject) PriorityQueue(java.util.PriorityQueue) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 5 with ParserConstraint

use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.

the class LexicalizedParserITest method testConstraints.

/**
   * Test what happens if you put a constraint on the parse
   */
public void testConstraints() {
    List<CoreLabel> sentence = sampleSausage();
    ParserQuery pq = englishParser.parserQuery();
    ParserConstraint constraint = new ParserConstraint(0, 2, "SBAR|SBAR[^a-zA-Z].*");
    List<ParserConstraint> constraints = new ArrayList<>();
    constraints.add(constraint);
    pq.setConstraints(constraints);
    pq.parse(sentence);
    StringWriter sw = new StringWriter();
    pennPrint.printTree(pq.getBestParse(), (new PrintWriter(sw)));
    String actualOutput = sw.toString().replaceAll("\\s+", " ").trim();
    String expectedOutput = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
    expectedOutput = expectedOutput.replaceAll("\\s+", " ").trim();
    // Not exactly sure what should come back, but it shouldn't be the
    // original output any more
    assertFalse("Tree should not match the original tree any more", expectedOutput.equals(actualOutput));
    assertTrue("Tree should be forced to contain SBAR", actualOutput.contains("SBAR"));
//System.out.println(pq.getBestParse());
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) StringWriter(java.io.StringWriter) ArrayList(java.util.ArrayList) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) PrintWriter(java.io.PrintWriter)

Aggregations

ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)11 CoreLabel (edu.stanford.nlp.ling.CoreLabel)5 ArrayList (java.util.ArrayList)5 Tree (edu.stanford.nlp.trees.Tree)4 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)2 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)2 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)2 Matcher (java.util.regex.Matcher)2 ConstraintAnnotation (edu.stanford.nlp.parser.common.ParserAnnotations.ConstraintAnnotation)1 CoreMap (edu.stanford.nlp.util.CoreMap)1 ScoredObject (edu.stanford.nlp.util.ScoredObject)1 PrintWriter (java.io.PrintWriter)1 StringWriter (java.io.StringWriter)1 PriorityQueue (java.util.PriorityQueue)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 Document (org.w3c.dom.Document)1 Element (org.w3c.dom.Element)1 NodeList (org.w3c.dom.NodeList)1 SAXException (org.xml.sax.SAXException)1