Search in sources :

Example 16 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class MWEPreprocessor method countMWEStatistics.

public static void countMWEStatistics(Tree t, TwoDimensionalCounter<String, String> unigramTagger, TwoDimensionalCounter<String, String> labelPreterm, TwoDimensionalCounter<String, String> pretermLabel, TwoDimensionalCounter<String, String> labelTerm, TwoDimensionalCounter<String, String> termLabel) {
    updateTagger(unigramTagger, t);
    //Count MWE statistics
    TregexMatcher m = pMWE.matcher(t);
    while (m.findNextMatchingNode()) {
        Tree match = m.getMatch();
        String label = match.value();
        if (RESOLVE_DUMMY_TAGS && label.equals(FrenchXMLTreeReader.MISSING_PHRASAL))
            continue;
        String preterm = SentenceUtils.listToString(match.preTerminalYield());
        String term = SentenceUtils.listToString(match.yield());
        labelPreterm.incrementCount(label, preterm);
        pretermLabel.incrementCount(preterm, label);
        labelTerm.incrementCount(label, term);
        termLabel.incrementCount(term, label);
    }
}
Also used : Tree(edu.stanford.nlp.trees.Tree) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 17 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class RHSFrequency method main.

public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage.toString());
        System.exit(-1);
    }
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    DiskTreebank tb = null;
    String encoding = "UTF-8";
    TregexPattern rootMatch = null;
    for (int i = 0; i < args.length; i++) {
        if (args[i].startsWith("-")) {
            switch(args[i]) {
                case "-l":
                    Language lang = Language.valueOf(args[++i].trim());
                    tlpp = lang.params;
                    break;
                case "-e":
                    encoding = args[++i];
                    break;
                default:
                    System.out.println(usage.toString());
                    System.exit(-1);
            }
        } else {
            rootMatch = TregexPattern.compile("@" + args[i++]);
            if (tb == null) {
                if (tlpp == null) {
                    System.out.println(usage.toString());
                    System.exit(-1);
                } else {
                    tlpp.setInputEncoding(encoding);
                    tlpp.setOutputEncoding(encoding);
                    tb = tlpp.diskTreebank();
                }
            }
            tb.loadPath(args[i++]);
        }
    }
    Counter<String> rhsCounter = new ClassicCounter<>();
    for (Tree t : tb) {
        TregexMatcher m = rootMatch.matcher(t);
        while (m.findNextMatchingNode()) {
            Tree match = m.getMatch();
            StringBuilder sb = new StringBuilder();
            for (Tree kid : match.children()) sb.append(kid.value()).append(" ");
            rhsCounter.incrementCount(sb.toString().trim());
        }
    }
    List<String> biggestKeys = new ArrayList<>(rhsCounter.keySet());
    Collections.sort(biggestKeys, Counters.toComparatorDescending(rhsCounter));
    PrintWriter pw = tlpp.pw();
    for (String rhs : biggestKeys) pw.printf("%s\t%d%n", rhs, (int) rhsCounter.getCount(rhs));
    pw.close();
}
Also used : DiskTreebank(edu.stanford.nlp.trees.DiskTreebank) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) ArrayList(java.util.ArrayList) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams) Language(edu.stanford.nlp.international.Language) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Tree(edu.stanford.nlp.trees.Tree) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) PrintWriter(java.io.PrintWriter)

Example 18 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractNPorPRP.

protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        // try not to have span that ends with ,
        if (",".equals(sent.get(endIdx - 1).word())) {
            endIdx--;
        }
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        if (!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 19 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class TregexPoweredTreebankParserParams method getAnnotationString.

/**
   * Build a string of annotations for the given tree.
   *
   * @param t The input tree (with non-language specific annotation
   *          already done, so you need to strip back to basic categories)
   * @param root The root of the current tree (can be null for words)
   * @return A (possibly empty) string of annotations to add to the
   *         given tree
   */
protected String getAnnotationString(Tree t, Tree root) {
    // Accumulate all annotations in this string
    StringBuilder annotationStr = new StringBuilder();
    for (String featureName : features) {
        Pair<TregexPattern, Function<TregexMatcher, String>> behavior = annotationPatterns.get(featureName);
        TregexMatcher m = behavior.first().matcher(root);
        if (m.matchesAt(t))
            annotationStr.append(behavior.second().apply(m));
    }
    return annotationStr.toString();
}
Also used : Function(java.util.function.Function) SerializableFunction(edu.stanford.nlp.process.SerializableFunction) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 20 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class TregexPoweredTreebankParserParams method compileAnnotations.

/**
   * Compile the {@link #annotations} collection given a
   * particular head finder. Subclasses should call this method at
   * least once before the class is used, and whenever the head finder
   * is changed.
   */
protected void compileAnnotations(HeadFinder hf) {
    TregexPatternCompiler compiler = new TregexPatternCompiler(hf);
    annotationPatterns.clear();
    for (Map.Entry<String, Pair<String, Function<TregexMatcher, String>>> annotation : annotations.entrySet()) {
        TregexPattern compiled;
        try {
            compiled = compiler.compile(annotation.getValue().first());
        } catch (TregexParseException e) {
            int nth = annotationPatterns.size() + 1;
            log.info("Parse exception on annotation pattern #" + nth + " initialization: " + e);
            continue;
        }
        Pair<TregexPattern, Function<TregexMatcher, String>> behavior = new Pair<>(compiled, annotation.getValue().second());
        annotationPatterns.put(annotation.getKey(), behavior);
    }
}
Also used : TregexParseException(edu.stanford.nlp.trees.tregex.TregexParseException) Function(java.util.function.Function) SerializableFunction(edu.stanford.nlp.process.SerializableFunction) TregexPatternCompiler(edu.stanford.nlp.trees.tregex.TregexPatternCompiler) TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Aggregations

TregexMatcher (edu.stanford.nlp.trees.tregex.TregexMatcher)24 TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)16 Tree (edu.stanford.nlp.trees.Tree)10 CoreLabel (edu.stanford.nlp.ling.CoreLabel)9 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)6 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)5 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)5 Mention (edu.stanford.nlp.coref.data.Mention)3 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)3 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)3 TregexParseException (edu.stanford.nlp.trees.tregex.TregexParseException)3 IntPair (edu.stanford.nlp.util.IntPair)3 Label (edu.stanford.nlp.ling.Label)2 SerializableFunction (edu.stanford.nlp.process.SerializableFunction)2 TreeReader (edu.stanford.nlp.trees.TreeReader)2 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2 FrenchTreeReaderFactory (edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory)2 TsurgeonPattern (edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)2 Pair (edu.stanford.nlp.util.Pair)2