Search in sources :

Example 1 with TsurgeonPattern

use of edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern in project CoreNLP by stanfordnlp.

the class FTBCorrector method loadOps.

private List<Pair<TregexPattern, TsurgeonPattern>> loadOps() {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<>();
    String line = null;
    try {
        BufferedReader br = new BufferedReader(new StringReader(editStr));
        List<TsurgeonPattern> tsp = new ArrayList<>();
        while ((line = br.readLine()) != null) {
            if (DEBUG)
                log.info("Pattern is " + line);
            TregexPattern matchPattern = TregexPattern.compile(line);
            if (DEBUG)
                log.info(" [" + matchPattern + "]");
            tsp.clear();
            while (continuing(line = br.readLine())) {
                TsurgeonPattern p = Tsurgeon.parseOperation(line);
                if (DEBUG)
                    log.info("Operation is " + line + " [" + p + "]");
                tsp.add(p);
            }
            if (!tsp.isEmpty()) {
                TsurgeonPattern tp = Tsurgeon.collectOperations(tsp);
                ops.add(new Pair<>(matchPattern, tp));
            }
        }
    // while not at end of file
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }
    return ops;
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) ArrayList(java.util.ArrayList) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern) Pair(edu.stanford.nlp.util.Pair)

Example 2 with TsurgeonPattern

use of edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern in project CoreNLP by stanfordnlp.

the class InputPanel method runScript.

private void runScript() {
    setTsurgeonState(true);
    final String script = tsurgeonScript.getText();
    searchThread = new Thread() {

        @Override
        public void run() {
            try {
                BufferedReader reader = new BufferedReader(new StringReader(script));
                TsurgeonPattern operation = Tsurgeon.getTsurgeonOperationsFromReader(reader);
                final String text = tregexPattern.getText().intern();
                SwingUtilities.invokeLater(() -> {
                    InputPanel.this.addRecentTregexPattern(text);
                    useProgressBar(true);
                });
                final TRegexGUITreeVisitor visitor = getMatchTreeVisitor(text, this);
                // means the tregex errored out
                if (visitor == null)
                    return;
                if (this.isInterrupted()) {
                    returnToValidState(text, visitor, new ArrayList<>());
                    return;
                }
                // log.info("Running Script with matches: " + visitor.getMatches());
                List<TreeFromFile> trees = visitor.getMatches();
                final List<TreeFromFile> modifiedTrees = new ArrayList<>();
                for (TreeFromFile tff : trees) {
                    if (this.isInterrupted()) {
                        returnToValidState(text, visitor, trees);
                        return;
                    }
                    Tree modifiedTree = Tsurgeon.processPattern(visitor.getPattern(), operation, tff.getTree());
                    modifiedTrees.add(new TreeFromFile(modifiedTree, tff.getFilename().intern()));
                }
                returnToValidState(text, visitor, modifiedTrees);
            } catch (Exception e) {
                doError("Sorry, there was an error compiling or running the Tsurgeon script.  Please press Help if you need assistance.", e);
                SwingUtilities.invokeLater(() -> {
                    setTregexState(false);
                    setTsurgeonState(false);
                    InputPanel.this.searchThread = null;
                });
            }
        }
    };
    searchThread.start();
}
Also used : ArrayList(java.util.ArrayList) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) ArrayList(java.util.ArrayList) List(java.util.List)

Example 3 with TsurgeonPattern

use of edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern in project CoreNLP by stanfordnlp.

the class SpanishTreeNormalizer method expandConmigo.

/**
 * ¡Venga, expand conmigo!
 */
private static Tree expandConmigo(Tree t) {
    TregexMatcher matcher = conmigoPattern.matcher(t);
    while (matcher.find()) {
        Tree conmigoNode = matcher.getNode("conmigo");
        String word = conmigoNode.value();
        String newPronoun = null;
        if (word.equalsIgnoreCase("conmigo"))
            newPronoun = "mí";
        else if (word.equalsIgnoreCase("contigo"))
            newPronoun = "ti";
        else if (word.equalsIgnoreCase("consigo"))
            newPronoun = "sí";
        if (word.charAt(0) == 'C')
            newPronoun = newPronoun.toUpperCase();
        String tsurgeon = String.format("[relabel conmigo /%s/]" + "[adjoinF (sp (prep (sp000 con)) foot@) sn]", newPronoun);
        TsurgeonPattern pattern = Tsurgeon.parseOperation(tsurgeon);
        t = pattern.matcher().evaluate(t, matcher);
    }
    return t;
}
Also used : TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)

Example 4 with TsurgeonPattern

use of edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern in project CoreNLP by stanfordnlp.

the class SpanishTreeNormalizer method expandCliticPronounsInner.

/**
 * Expand clitic pronouns on verbs matching the given pattern.
 */
private static Tree expandCliticPronounsInner(Tree t, TregexPattern pattern) {
    TregexMatcher matcher = pattern.matcher(t);
    while (matcher.find()) {
        Tree verbNode = matcher.getNode("vb");
        String verb = verbNode.value();
        if (!SpanishVerbStripper.isStrippable(verb))
            continue;
        SpanishVerbStripper.StrippedVerb split = verbStripper.separatePronouns(verb);
        if (split == null)
            continue;
        // Retrieve some context for the pronoun disambiguator: take the
        // matched clause and walk (at most) two constituents up
        StringBuilder clauseYieldBuilder = new StringBuilder();
        for (Label label : matcher.getNode("clause").yield()) clauseYieldBuilder.append(label.value()).append(" ");
        String clauseYield = clauseYieldBuilder.toString();
        clauseYield = clauseYield.substring(0, clauseYield.length() - 1);
        // Insert clitic pronouns as leaves of pronominal phrases which are
        // siblings of `target`. Iterate in reverse order since pronouns are
        // attached to immediate right of `target`
        List<String> pronouns = split.getPronouns();
        for (int i = pronouns.size() - 1; i >= 0; i--) {
            String pronoun = pronouns.get(i);
            String newTreeStr = null;
            if (AnCoraPronounDisambiguator.isAmbiguous(pronoun)) {
                AnCoraPronounDisambiguator.PersonalPronounType type = AnCoraPronounDisambiguator.disambiguatePersonalPronoun(split, i, clauseYield);
                switch(type) {
                    case OBJECT:
                        newTreeStr = "(sn (grup.nom (pp000000 %s)))";
                        break;
                    case REFLEXIVE:
                        newTreeStr = "(morfema.pronominal (pp000000 %s))";
                        break;
                    case UNKNOWN:
                        // Mark for manual disambiguation
                        newTreeStr = "(PRONOUN? (pp000000 %s))";
                        break;
                }
            } else {
                // Unambiguous clitic pronouns are all indirect / direct
                // object pronouns.. convenient!
                newTreeStr = "(sn (grup.nom (pp000000 %s)))";
            }
            String patternString = "[insert " + String.format(newTreeStr, pronoun) + " $- target]";
            TsurgeonPattern insertPattern = Tsurgeon.parseOperation(patternString);
            t = insertPattern.matcher().evaluate(t, matcher);
        }
        TsurgeonPattern relabelOperation = Tsurgeon.parseOperation(String.format("[relabel vb /%s/]", split.getStem()));
        t = relabelOperation.matcher().evaluate(t, matcher);
    }
    return t;
}
Also used : SpanishVerbStripper(edu.stanford.nlp.international.spanish.SpanishVerbStripper) AnCoraPronounDisambiguator(edu.stanford.nlp.international.spanish.process.AnCoraPronounDisambiguator) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)

Example 5 with TsurgeonPattern

use of edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern in project CoreNLP by stanfordnlp.

the class ATBCorrector method loadOps.

private List<Pair<TregexPattern, TsurgeonPattern>> loadOps() {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<>();
    String line = null;
    try {
        BufferedReader br = new BufferedReader(new StringReader(editStr));
        List<TsurgeonPattern> tsp = new ArrayList<>();
        while ((line = br.readLine()) != null) {
            if (DEBUG)
                log.info("Pattern is " + line);
            TregexPattern matchPattern = TregexPattern.compile(line);
            if (DEBUG)
                log.info(" [" + matchPattern + "]");
            tsp.clear();
            while (continuing(line = br.readLine())) {
                TsurgeonPattern p = Tsurgeon.parseOperation(line);
                if (DEBUG)
                    log.info("Operation is " + line + " [" + p + "]");
                tsp.add(p);
            }
            if (!tsp.isEmpty()) {
                TsurgeonPattern tp = Tsurgeon.collectOperations(tsp);
                ops.add(new Pair<>(matchPattern, tp));
            }
        }
    // while not at end of file
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }
    return ops;
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) ArrayList(java.util.ArrayList) TsurgeonPattern(edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern) Pair(edu.stanford.nlp.util.Pair)

Aggregations

TsurgeonPattern (edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)7 TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)4 ArrayList (java.util.ArrayList)4 Pair (edu.stanford.nlp.util.Pair)3 TregexMatcher (edu.stanford.nlp.trees.tregex.TregexMatcher)2 BufferedReader (java.io.BufferedReader)2 IOException (java.io.IOException)2 StringReader (java.io.StringReader)2 SpanishVerbStripper (edu.stanford.nlp.international.spanish.SpanishVerbStripper)1 AnCoraPronounDisambiguator (edu.stanford.nlp.international.spanish.process.AnCoraPronounDisambiguator)1 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 Label (edu.stanford.nlp.ling.Label)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 Counter (edu.stanford.nlp.stats.Counter)1 Tree (edu.stanford.nlp.trees.Tree)1 List (java.util.List)1