use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class Tsurgeon method getOperationFromReader.
/**
* Parses a tsurgeon script text input and compiles a tregex pattern and a list
* of tsurgeon operations into a pair.
*
* @param reader Reader to read patterns from
* @return A pair of a tregex and tsurgeon pattern read from a file, or {@code null}
* when the operations present in the Reader have been exhausted
* @throws IOException If any IO problem
*/
public static Pair<TregexPattern, TsurgeonPattern> getOperationFromReader(BufferedReader reader, TregexPatternCompiler compiler) throws IOException {
String patternString = getTregexPatternFromReader(reader);
// log.info("Read tregex pattern: " + patternString);
if (patternString.isEmpty()) {
return null;
}
TregexPattern matchPattern = compiler.compile(patternString);
TsurgeonPattern collectedPattern = getTsurgeonOperationsFromReader(reader);
return new Pair<>(matchPattern, collectedPattern);
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class SemanticHeadFinder method determineNonTrivialHead.
/**
* Determine which daughter of the current parse tree is the
* head. It assumes that the daughters already have had their
* heads determined. Uses special rule for VP heads
*
* @param t The parse tree to examine the daughters of.
* This is assumed to never be a leaf
* @return The parse tree that is the head
*/
@Override
protected Tree determineNonTrivialHead(Tree t, Tree parent) {
String motherCat = tlp.basicCategory(t.label().value());
if (DEBUG) {
log.info("At " + motherCat + ", my parent is " + parent);
}
// downstream code was written assuming "not" would be the head...
if (motherCat.equals("CONJP")) {
for (TregexPattern pattern : headOfConjpTregex) {
TregexMatcher matcher = pattern.matcher(t);
if (matcher.matchesAt(t)) {
return matcher.getNode("head");
}
}
// if none of the above patterns match, use the standard method
}
if (motherCat.equals("SBARQ") || motherCat.equals("SINV")) {
if (!makeCopulaHead) {
for (TregexPattern pattern : headOfCopulaTregex) {
TregexMatcher matcher = pattern.matcher(t);
if (matcher.matchesAt(t)) {
return matcher.getNode("head");
}
}
}
// if none of the above patterns match, use the standard method
}
// do VPs with auxiliary as special case
if ((motherCat.equals("VP") || motherCat.equals("SQ") || motherCat.equals("SINV"))) {
Tree[] kids = t.children();
if (DEBUG) {
log.info("Semantic head finder: at VP");
log.info("Class is " + t.getClass().getName());
t.pennPrint(System.err);
//log.info("hasVerbalAuxiliary = " + hasVerbalAuxiliary(kids, verbalAuxiliaries));
}
// looks for auxiliaries
Tree[] tmpFilteredChildren = null;
if (hasVerbalAuxiliary(kids, verbalAuxiliaries, true) || hasPassiveProgressiveAuxiliary(kids)) {
// String[] how = new String[] {"left", "VP", "ADJP", "NP"};
// Including NP etc seems okay for copular sentences but is
// problematic for other auxiliaries, like 'he has an answer'
String[] how;
if (hasVerbalAuxiliary(kids, copulars, true)) {
// Only allow ADJP in copular constructions
// In constructions like "It gets cold", "get" should be the head
how = new String[] { "left", "VP", "ADJP" };
} else {
how = new String[] { "left", "VP" };
}
if (tmpFilteredChildren == null) {
tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
}
Tree pti = traverseLocate(tmpFilteredChildren, how, false);
if (DEBUG) {
log.info("Determined head (case 1) for " + t.value() + " is: " + pti);
}
if (pti != null) {
return pti;
// } else {
// log.info("------");
// log.info("SemanticHeadFinder failed to reassign head for");
// t.pennPrint(System.err);
// log.info("------");
}
}
// looks for copular verbs
if (hasVerbalAuxiliary(kids, copulars, false) && !isExistential(t, parent) && !isWHQ(t, parent)) {
String[] how;
if (motherCat.equals("SQ")) {
how = new String[] { "right", "VP", "ADJP", "NP", "WHADJP", "WHNP" };
} else {
how = new String[] { "left", "VP", "ADJP", "NP", "WHADJP", "WHNP" };
}
// Avoid undesirable heads by filtering them from the list of potential children
if (tmpFilteredChildren == null) {
tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
}
Tree pti = traverseLocate(tmpFilteredChildren, how, false);
// In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative)
if (motherCat.equals("SQ") && pti != null && pti.label() != null && pti.label().value().startsWith("NP")) {
boolean foundAnotherNp = false;
for (Tree kid : kids) {
if (kid == pti) {
break;
} else if (kid.label() != null && kid.label().value().startsWith("NP")) {
foundAnotherNp = true;
break;
}
}
if (!foundAnotherNp) {
pti = null;
}
}
if (DEBUG) {
log.info("Determined head (case 2) for " + t.value() + " is: " + pti);
}
if (pti != null) {
return pti;
} else {
if (DEBUG) {
log.info("------");
log.info("SemanticHeadFinder failed to reassign head for");
t.pennPrint(System.err);
log.info("------");
}
}
}
}
Tree hd = super.determineNonTrivialHead(t, parent);
if (DEBUG) {
log.info("Determined head (case 3) for " + t.value() + " is: " + hd);
}
return hd;
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class GrammaticalRelation method getRelatedNodes.
/** Given a {@code Tree} node {@code t}, attempts to
* return a list of nodes to which node {@code t} has this
* grammatical relation, with {@code t} as the governor.
*
* @param t Target for finding dependents of t related by this GR
* @param root The root of the Tree
* @return A Collection of dependent nodes to which t bears this GR
*/
public Collection<TreeGraphNode> getRelatedNodes(TreeGraphNode t, TreeGraphNode root, HeadFinder headFinder) {
Set<TreeGraphNode> nodeList = new ArraySet<>();
for (TregexPattern p : targetPatterns) {
// cdm: I deleted: && nodeList.isEmpty()
// Initialize the TregexMatcher with the HeadFinder so that we
// can use the same HeadFinder through the entire process of
// building the dependencies
TregexMatcher m = p.matcher(root, headFinder);
while (m.findAt(t)) {
TreeGraphNode target = (TreeGraphNode) m.getNode("target");
if (target == null) {
throw new AssertionError("Expression has no target: " + p);
}
nodeList.add(target);
if (DEBUG) {
log.info("found " + this + "(" + t + "-" + t.headWordNode() + ", " + m.getNode("target") + "-" + ((TreeGraphNode) m.getNode("target")).headWordNode() + ") using pattern " + p);
for (String nodeName : m.getNodeNames()) {
if (nodeName.equals("target"))
continue;
log.info(" node " + nodeName + ": " + m.getNode(nodeName));
}
}
}
}
return nodeList;
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method isPleonasticDebug.
public static boolean isPleonasticDebug(Mention m, Tree tree, StringBuilder sbLog) {
if (!m.spanToString().equalsIgnoreCase("it"))
return false;
boolean isPleonastic = false;
int patternIdx = -1;
int matchedPattern = -1;
for (TregexPattern p : pleonasticPatterns) {
patternIdx++;
if (checkPleonastic(m, tree, p)) {
// SieveCoreferenceSystem.logger.fine("RuleBasedCorefMentionFinder: matched pleonastic pattern '" + p + "' for " + tree);
isPleonastic = true;
matchedPattern = patternIdx;
}
}
sbLog.append("PLEONASTIC IT: mention ID: " + m.mentionID + "\thastwin: " + m.hasTwin + "\tpleonastic it? " + isPleonastic + "\tcorrect? " + (m.hasTwin != isPleonastic) + "\tmatched pattern: " + matchedPattern + "\n");
sbLog.append(m.contextParseTree.pennString()).append("\n");
sbLog.append("PLEONASTIC IT END\n");
return isPleonastic;
}
use of edu.stanford.nlp.trees.tregex.TregexPattern in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
Aggregations