Search in sources :

Example 21 with TregexPattern

use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNPorPRP.

protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        // try not to have span that ends with ,
        if (",".equals(sent.get(endIdx - 1).word())) {
            endIdx--;
        }
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        if (!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 22 with TregexPattern

use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.

the class TregexPoweredTreebankParserParams method getAnnotationString.

/**
   * Build a string of annotations for the given tree.
   *
   * @param t The input tree (with non-language specific annotation
   *          already done, so you need to strip back to basic categories)
   * @param root The root of the current tree (can be null for words)
   * @return A (possibly empty) string of annotations to add to the
   *         given tree
   */
protected String getAnnotationString(Tree t, Tree root) {
    // Accumulate all annotations in this string
    StringBuilder annotationStr = new StringBuilder();
    for (String featureName : features) {
        Pair<TregexPattern, Function<TregexMatcher, String>> behavior = annotationPatterns.get(featureName);
        TregexMatcher m = behavior.first().matcher(root);
        if (m.matchesAt(t))
            annotationStr.append(behavior.second().apply(m));
    }
    return annotationStr.toString();
}
Also used : Function(java.util.function.Function) SerializableFunction(edu.stanford.nlp.process.SerializableFunction) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 23 with TregexPattern

use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.

the class TregexPoweredTreebankParserParams method compileAnnotations.

/**
   * Compile the {@link #annotations} collection given a
   * particular head finder. Subclasses should call this method at
   * least once before the class is used, and whenever the head finder
   * is changed.
   */
protected void compileAnnotations(HeadFinder hf) {
    TregexPatternCompiler compiler = new TregexPatternCompiler(hf);
    annotationPatterns.clear();
    for (Map.Entry<String, Pair<String, Function<TregexMatcher, String>>> annotation : annotations.entrySet()) {
        TregexPattern compiled;
        try {
            compiled = compiler.compile(annotation.getValue().first());
        } catch (TregexParseException e) {
            int nth = annotationPatterns.size() + 1;
            log.info("Parse exception on annotation pattern #" + nth + " initialization: " + e);
            continue;
        }
        Pair<TregexPattern, Function<TregexMatcher, String>> behavior = new Pair<>(compiled, annotation.getValue().second());
        annotationPatterns.put(annotation.getKey(), behavior);
    }
}
Also used : TregexParseException(edu.stanford.nlp.trees.tregex.TregexParseException) Function(java.util.function.Function) SerializableFunction(edu.stanford.nlp.process.SerializableFunction) TregexPatternCompiler(edu.stanford.nlp.trees.tregex.TregexPatternCompiler) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Example 24 with TregexPattern

use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.

the class CoordinationTransformer method MWETransform.

/**
   * Puts all multi-word expressions below a single constituent labeled "MWE".
   * Patterns for multi-word expressions are defined in MWE_PATTERNS.
   */
public static Tree MWETransform(Tree t) {
    for (TregexPattern p : MWE_PATTERNS) {
        Tsurgeon.processPattern(p, MWE_OPERATION, t);
    }
    Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t);
    Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t);
    Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t);
    Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t);
    return t;
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern)

Example 25 with TregexPattern

use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.

the class GenerateTrees method readGrammar.

public void readGrammar(BufferedReader bin) {
    try {
        String line;
        Section section = Section.TERMINALS;
        while ((line = bin.readLine()) != null) {
            line = line.trim();
            if (line.equals("")) {
                continue;
            }
            if (line.length() > 0 && line.charAt(0) == '#') {
                // skip comments
                continue;
            }
            try {
                Section newSection = Section.valueOf(line.toUpperCase());
                section = newSection;
                if (section == Section.TSURGEON) {
                    // this will tregex pattern until it has eaten a blank
                    // line, then read tsurgeon until it has eaten another
                    // blank line.
                    Pair<TregexPattern, TsurgeonPattern> operation = Tsurgeon.getOperationFromReader(bin, compiler);
                    tsurgeons.add(operation);
                }
                continue;
            } catch (IllegalArgumentException e) {
            // never mind, not an enum
            }
            String[] pieces = line.split(" +");
            switch(section) {
                case TSURGEON:
                    {
                        throw new RuntimeException("Found a non-empty line in a tsurgeon section after reading the operation");
                    }
                case TERMINALS:
                    {
                        Counter<String> productions = terminals.get(pieces[0]);
                        if (productions == null) {
                            productions = new ClassicCounter<>();
                            terminals.put(pieces[0], productions);
                        }
                        for (int i = 1; i < pieces.length; ++i) {
                            productions.incrementCount(pieces[i]);
                        }
                        break;
                    }
                case NONTERMINALS:
                    {
                        Counter<List<String>> productions = nonTerminals.get(pieces[0]);
                        if (productions == null) {
                            productions = new ClassicCounter<>();
                            nonTerminals.put(pieces[0], productions);
                        }
                        String[] sublist = Arrays.copyOfRange(pieces, 1, pieces.length);
                        productions.incrementCount(Arrays.asList(sublist));
                    }
            }
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) Counter(edu.stanford.nlp.stats.Counter) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)

Aggregations

TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)29 TregexMatcher (edu.stanford.nlp.trees.tregex.TregexMatcher)16 Tree (edu.stanford.nlp.trees.Tree)8 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)6 CoreLabel (edu.stanford.nlp.ling.CoreLabel)6 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)6 Pair (edu.stanford.nlp.util.Pair)6 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)5 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)5 ArrayList (java.util.ArrayList)5 TregexParseException (edu.stanford.nlp.trees.tregex.TregexParseException)4 TsurgeonPattern (edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)4 Mention (edu.stanford.nlp.coref.data.Mention)3 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)3 IntPair (edu.stanford.nlp.util.IntPair)3 IOException (java.io.IOException)3 PrintWriter (java.io.PrintWriter)3 SerializableFunction (edu.stanford.nlp.process.SerializableFunction)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2 TreeReader (edu.stanford.nlp.trees.TreeReader)2