use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class RuleBasedCorefMentionFinder method extractNPorPRP.
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
IntPair mSpan = new IntPair(beginIdx, endIdx);
if (!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class TregexPoweredTreebankParserParams method getAnnotationString.
/**
* Build a string of annotations for the given tree.
*
* @param t The input tree (with non-language specific annotation
* already done, so you need to strip back to basic categories)
* @param root The root of the current tree (can be null for words)
* @return A (possibly empty) string of annotations to add to the
* given tree
*/
protected String getAnnotationString(Tree t, Tree root) {
// Accumulate all annotations in this string
StringBuilder annotationStr = new StringBuilder();
for (String featureName : features) {
Pair<TregexPattern, Function<TregexMatcher, String>> behavior = annotationPatterns.get(featureName);
TregexMatcher m = behavior.first().matcher(root);
if (m.matchesAt(t))
annotationStr.append(behavior.second().apply(m));
}
return annotationStr.toString();
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class TregexPoweredTreebankParserParams method compileAnnotations.
/**
* Compile the {@link #annotations} collection given a
* particular head finder. Subclasses should call this method at
* least once before the class is used, and whenever the head finder
* is changed.
*/
protected void compileAnnotations(HeadFinder hf) {
TregexPatternCompiler compiler = new TregexPatternCompiler(hf);
annotationPatterns.clear();
for (Map.Entry<String, Pair<String, Function<TregexMatcher, String>>> annotation : annotations.entrySet()) {
TregexPattern compiled;
try {
compiled = compiler.compile(annotation.getValue().first());
} catch (TregexParseException e) {
int nth = annotationPatterns.size() + 1;
log.info("Parse exception on annotation pattern #" + nth + " initialization: " + e);
continue;
}
Pair<TregexPattern, Function<TregexMatcher, String>> behavior = new Pair<>(compiled, annotation.getValue().second());
annotationPatterns.put(annotation.getKey(), behavior);
}
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class CoordinationTransformer method MWETransform.
/**
* Puts all multi-word expressions below a single constituent labeled "MWE".
* Patterns for multi-word expressions are defined in MWE_PATTERNS.
*/
public static Tree MWETransform(Tree t) {
for (TregexPattern p : MWE_PATTERNS) {
Tsurgeon.processPattern(p, MWE_OPERATION, t);
}
Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t);
Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t);
Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t);
Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t);
return t;
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class GenerateTrees method readGrammar.
public void readGrammar(BufferedReader bin) {
try {
String line;
Section section = Section.TERMINALS;
while ((line = bin.readLine()) != null) {
line = line.trim();
if (line.equals("")) {
continue;
}
if (line.length() > 0 && line.charAt(0) == '#') {
// skip comments
continue;
}
try {
Section newSection = Section.valueOf(line.toUpperCase());
section = newSection;
if (section == Section.TSURGEON) {
// this will tregex pattern until it has eaten a blank
// line, then read tsurgeon until it has eaten another
// blank line.
Pair<TregexPattern, TsurgeonPattern> operation = Tsurgeon.getOperationFromReader(bin, compiler);
tsurgeons.add(operation);
}
continue;
} catch (IllegalArgumentException e) {
// never mind, not an enum
}
String[] pieces = line.split(" +");
switch(section) {
case TSURGEON:
{
throw new RuntimeException("Found a non-empty line in a tsurgeon section after reading the operation");
}
case TERMINALS:
{
Counter<String> productions = terminals.get(pieces[0]);
if (productions == null) {
productions = new ClassicCounter<>();
terminals.put(pieces[0], productions);
}
for (int i = 1; i < pieces.length; ++i) {
productions.incrementCount(pieces[i]);
}
break;
}
case NONTERMINALS:
{
Counter<List<String>> productions = nonTerminals.get(pieces[0]);
if (productions == null) {
productions = new ClassicCounter<>();
nonTerminals.put(pieces[0], productions);
}
String[] sublist = Arrays.copyOfRange(pieces, 1, pieces.length);
productions.incrementCount(Arrays.asList(sublist));
}
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations