use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.
the class ShiftReduceParserITest method testBasicConstraint.
public void testBasicConstraint() {
List<CoreLabel> sentence = SentenceUtils.toCoreLabelList("It", "was", "Carolina", "Reapers", ".");
englishTagger.tagCoreLabels(sentence);
Tree result = englishParser.apply(sentence);
// pretty much need to make the test rely on the parser being consistent
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina) (NNP Reapers))) (. .)))", result.toString());
ParserConstraint constraint = new ParserConstraint(2, 4, ".*");
List<ParserConstraint> constraints = Collections.singletonList(constraint);
ParserQuery pq = englishParser.parserQuery();
pq.setConstraints(constraints);
assertTrue(pq.parse(sentence));
result = pq.getBestParse();
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina) (NNP Reapers))) (. .)))", result.toString());
constraint = new ParserConstraint(2, 4, "NP");
constraints = Collections.singletonList(constraint);
pq = englishParser.parserQuery();
pq.setConstraints(constraints);
assertTrue(pq.parse(sentence));
result = pq.getBestParse();
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina) (NNP Reapers))) (. .)))", result.toString());
constraint = new ParserConstraint(2, 4, "ADJP");
constraints = Collections.singletonList(constraint);
pq = englishParser.parserQuery();
pq.setConstraints(constraints);
assertTrue(pq.parse(sentence));
result = pq.getBestParse();
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (ADJP (NP (NNP Carolina) (NNP Reapers)))) (. .)))", result.toString());
constraint = new ParserConstraint(1, 3, "VP");
constraints = Collections.singletonList(constraint);
pq = englishParser.parserQuery();
pq.setConstraints(constraints);
assertTrue(pq.parse(sentence));
result = pq.getBestParse();
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina))) (NP (NNP Reapers)) (. .)))", result.toString());
}
use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.
the class BinaryTransition method isLegal.
/**
* Legal as long as there are at least two items on the state's stack.
*/
public boolean isLegal(State state, List<ParserConstraint> constraints) {
// some of these quotes come directly from Zhang Clark 09
if (state.finished) {
return false;
}
if (state.stack.size() <= 1) {
return false;
}
// at least one of the two nodes on top of stack must be non-temporary
if (ShiftReduceUtils.isTemporary(state.stack.peek()) && ShiftReduceUtils.isTemporary(state.stack.pop().peek())) {
return false;
}
if (ShiftReduceUtils.isTemporary(state.stack.peek())) {
if (side == Side.LEFT) {
return false;
}
if (!ShiftReduceUtils.isEquivalentCategory(label, state.stack.peek().value())) {
return false;
}
}
if (ShiftReduceUtils.isTemporary(state.stack.pop().peek())) {
if (side == Side.RIGHT) {
return false;
}
if (!ShiftReduceUtils.isEquivalentCategory(label, state.stack.pop().peek().value())) {
return false;
}
}
// of size 1 and a queue of size 0
if (state.stack.size() == 2 && isBinarized() && state.endOfQueue()) {
return false;
}
// nodes from binary reduce must be left-headed
if (state.stack.size() == 2 && isBinarized() && side == Side.RIGHT) {
return false;
}
// reduce can be applied only if the resulting node is non-temporary
if (state.endOfQueue() && state.stack.size() > 2 && ShiftReduceUtils.isTemporary(state.stack.pop().pop().peek()) && isBinarized()) {
return false;
}
// from binary reduce must be left-headed
if (state.stack.size() > 2 && ShiftReduceUtils.isTemporary(state.stack.pop().pop().peek()) && isBinarized() && side == Side.RIGHT) {
return false;
}
if (constraints == null) {
return true;
}
final Tree top = state.stack.peek();
final int leftTop = ShiftReduceUtils.leftIndex(top);
final int rightTop = ShiftReduceUtils.rightIndex(top);
final Tree next = state.stack.pop().peek();
final int leftNext = ShiftReduceUtils.leftIndex(next);
// would make a temporary node, that is also illegal.
for (ParserConstraint constraint : constraints) {
if (leftTop == constraint.start) {
// can't binary reduce away from a tree which doesn't match a constraint
if (rightTop == constraint.end - 1) {
if (!ShiftReduceUtils.constraintMatchesTreeTop(top, constraint)) {
return false;
} else {
continue;
}
} else if (rightTop >= constraint.end) {
continue;
} else {
// can't binary reduce if it would make the tree cross the left boundary
return false;
}
}
// there's no harm to be done by binary reduce
if (leftTop < constraint.start) {
continue;
}
// top element is past the end of the constraint, so it must already be satisfied
if (leftTop >= constraint.end) {
continue;
}
// the next case is no good because it crosses the boundary
if (leftNext < constraint.start) {
return false;
}
if (leftNext > constraint.start) {
continue;
}
// can't transition to a binarized node when there's a constraint that matches.
if (rightTop == constraint.end - 1 && isBinarized()) {
return false;
}
}
return true;
}
use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.
the class ParserAnnotatorITest method testConstraints.
/**
* Test what happens if you put a constraint on the parse
*/
public void testConstraints() {
String expectedResult = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
Annotation annotation = new Annotation("My dog also likes eating sausage.");
noParserPipeline.annotate(annotation);
CoreMap sentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0);
parserOnlyPipeline.annotate(annotation);
assertEquals(expectedResult, sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString());
ParserConstraint constraint = new ParserConstraint(0, 2, "SBAR|SBAR[^a-zA-Z].*");
List<ParserConstraint> constraints = new ArrayList<ParserConstraint>();
constraints.add(constraint);
sentence.set(ConstraintAnnotation.class, constraints);
parserOnlyPipeline.annotate(annotation);
String result = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString();
assertFalse("Tree should not match the original tree any more", expectedResult.equals(result));
assertTrue("Tree should be forced to contain SBAR", result.indexOf("SBAR") >= 0);
}
use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.
the class GenericDataSetReader method findSyntacticHead.
/**
* Finds the syntactic head of the given entity mention.
*
* @param ent The entity mention
* @param root The Tree for the entire sentence in which it occurs.
* @param tokens The Sentence in which it occurs
* @return The tree object corresponding to the head. This MUST be a child of root.
* It will be a leaf in the parse tree.
*/
public Tree findSyntacticHead(EntityMention ent, Tree root, List<CoreLabel> tokens) {
if (!useNewHeadFinder) {
return originalFindSyntacticHead(ent, root, tokens);
}
logger.fine("Searching for tree matching " + ent);
Tree exactMatch = findTreeWithSpan(root, ent.getExtentTokenStart(), ent.getExtentTokenEnd());
//
if (exactMatch != null) {
logger.fine("Mention \"" + ent + "\" mapped to tree: " + printTree(exactMatch));
return safeHead(exactMatch);
}
// no exact match found
// in this case, we parse the actual extent of the mention, embedded in a sentence
// context, so as to make the parser work better :-)
int approximateness = 0;
List<CoreLabel> extentTokens = new ArrayList<>();
extentTokens.add(initCoreLabel("It"));
extentTokens.add(initCoreLabel("was"));
final int ADDED_WORDS = 2;
for (int i = ent.getExtentTokenStart(); i < ent.getExtentTokenEnd(); i++) {
// Add everything except separated dashes! The separated dashes mess with the parser too badly.
CoreLabel label = tokens.get(i);
if (!"-".equals(label.word())) {
extentTokens.add(tokens.get(i));
} else {
approximateness++;
}
}
extentTokens.add(initCoreLabel("."));
// constrain the parse to the part we're interested in.
// Starting from ADDED_WORDS comes from skipping "It was".
// -1 to exclude the period.
// We now let it be any kind of nominal constituent, since there
// are VP and S ones
ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, ".*");
List<ParserConstraint> constraints = Collections.singletonList(constraint);
Tree tree = parse(extentTokens, constraints);
logger.fine("No exact match found. Local parse:\n" + tree.pennString());
convertToCoreLabels(tree);
// remember it has ADDED_WORDS extra words at the beginning
tree.indexSpans(ent.getExtentTokenStart() - ADDED_WORDS);
Tree subtree = findPartialSpan(tree, ent.getExtentTokenStart());
Tree extentHead = safeHead(subtree);
logger.fine("Head is: " + extentHead);
assert (extentHead != null);
// extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
// Because we deleted dashes, it's index will be >= the index in the extent parse tree
CoreLabel l = (CoreLabel) extentHead.label();
// Tree realHead = findTreeWithSpan(root, l.get(CoreAnnotations.BeginIndexAnnotation.class), l.get(CoreAnnotations.EndIndexAnnotation.class));
Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
if (realHead != null)
logger.fine("Chosen head: " + realHead);
return realHead;
}
use of edu.stanford.nlp.parser.common.ParserConstraint in project CoreNLP by stanfordnlp.
the class ExhaustivePCFGParser method doInsideChartCell.
// end doInsideScores()
private void doInsideChartCell(final int diff, final int start) {
final boolean lengthNormalization = op.testOptions.lengthNormalization;
if (spillGuts) {
tick("Binaries for span " + diff + " start " + start + " ...");
}
int end = start + diff;
final List<ParserConstraint> constraints = getConstraints();
if (constraints != null) {
for (ParserConstraint c : constraints) {
if ((start > c.start && start < c.end && end > c.end) || (end > c.start && end < c.end && start < c.start)) {
return;
}
}
}
// 2011-11-26 jdk1.6: caching/hoisting a bunch of variables gives you about 15% speed up!
// caching this saves a bit of time in the inner loop, maybe 1.8%
int[] narrowRExtent_start = narrowRExtent[start];
// caching this saved 2% in the inner loop
int[] wideRExtent_start = wideRExtent[start];
int[] narrowLExtent_end = narrowLExtent[end];
int[] wideLExtent_end = wideLExtent[end];
float[][] iScore_start = iScore[start];
float[] iScore_start_end = iScore_start[end];
for (int leftState = 0; leftState < numStates; leftState++) {
int narrowR = narrowRExtent_start[leftState];
if (narrowR >= end) {
// can this left constituent leave space for a right constituent?
continue;
}
BinaryRule[] leftRules = bg.splitRulesWithLC(leftState);
// if (spillGuts) System.out.println("Found " + leftRules.length + " left rules for state " + stateIndex.get(leftState));
for (BinaryRule rule : leftRules) {
int rightChild = rule.rightChild;
int narrowL = narrowLExtent_end[rightChild];
if (narrowL < narrowR) {
// can this right constituent fit next to the left constituent?
continue;
}
int min2 = wideLExtent_end[rightChild];
int min = (narrowR > min2 ? narrowR : min2);
// Erik Frey 2009-12-17: This is unnecessary: narrowR is <= narrowL (established in previous check) and wideLExtent[e][r] is always <= narrowLExtent[e][r] by design, so the check will never evaluate true.
// if (min > narrowL) { // can this right constituent stretch far enough to reach the left constituent?
// continue;
// }
int max1 = wideRExtent_start[leftState];
int max = (max1 < narrowL ? max1 : narrowL);
if (min > max) {
// can this left constituent stretch far enough to reach the right constituent?
continue;
}
float pS = rule.score;
int parentState = rule.parent;
float oldIScore = iScore_start_end[parentState];
float bestIScore = oldIScore;
// always set below for this rule
boolean foundBetter;
if (!lengthNormalization) {
// find the split that can use this rule to make the max score
for (int split = min; split <= max; split++) {
if (constraints != null) {
boolean skip = false;
for (ParserConstraint c : constraints) {
if (((start < c.start && end >= c.end) || (start <= c.start && end > c.end)) && split > c.start && split < c.end) {
skip = true;
break;
}
if ((start == c.start && split == c.end)) {
String tag = stateIndex.get(leftState);
Matcher m = c.state.matcher(tag);
if (!m.matches()) {
skip = true;
break;
}
}
if ((split == c.start && end == c.end)) {
String tag = stateIndex.get(rightChild);
Matcher m = c.state.matcher(tag);
if (!m.matches()) {
skip = true;
break;
}
}
}
if (skip) {
continue;
}
}
float lS = iScore_start[split][leftState];
if (lS == Float.NEGATIVE_INFINITY) {
continue;
}
float rS = iScore[split][end][rightChild];
if (rS == Float.NEGATIVE_INFINITY) {
continue;
}
float tot = pS + lS + rS;
if (spillGuts) {
log.info("Rule " + rule + " over [" + start + "," + end + ") has log score " + tot + " from L[" + stateIndex.get(leftState) + "=" + leftState + "] = " + lS + " R[" + stateIndex.get(rightChild) + "=" + rightChild + "] = " + rS);
}
if (tot > bestIScore) {
bestIScore = tot;
}
}
// for split point
foundBetter = bestIScore > oldIScore;
} else {
// find split that uses this rule to make the max *length normalized* score
int bestWordsInSpan = wordsInSpan[start][end][parentState];
float oldNormIScore = oldIScore / bestWordsInSpan;
float bestNormIScore = oldNormIScore;
for (int split = min; split <= max; split++) {
float lS = iScore_start[split][leftState];
if (lS == Float.NEGATIVE_INFINITY) {
continue;
}
float rS = iScore[split][end][rightChild];
if (rS == Float.NEGATIVE_INFINITY) {
continue;
}
float tot = pS + lS + rS;
int newWordsInSpan = wordsInSpan[start][split][leftState] + wordsInSpan[split][end][rightChild];
float normTot = tot / newWordsInSpan;
if (normTot > bestNormIScore) {
bestIScore = tot;
bestNormIScore = normTot;
bestWordsInSpan = newWordsInSpan;
}
}
// for split point
foundBetter = bestNormIScore > oldNormIScore;
if (foundBetter) {
wordsInSpan[start][end][parentState] = bestWordsInSpan;
}
}
// fi op.testOptions.lengthNormalization
if (foundBetter) {
// this way of making "parentState" is better than previous
iScore_start_end[parentState] = bestIScore;
if (spillGuts)
log.info("Could build " + stateIndex.get(parentState) + " from " + start + " to " + end + " score " + bestIScore);
if (oldIScore == Float.NEGATIVE_INFINITY) {
if (start > narrowLExtent_end[parentState]) {
narrowLExtent_end[parentState] = wideLExtent_end[parentState] = start;
} else if (start < wideLExtent_end[parentState]) {
wideLExtent_end[parentState] = start;
}
if (end < narrowRExtent_start[parentState]) {
narrowRExtent_start[parentState] = wideRExtent_start[parentState] = end;
} else if (end > wideRExtent_start[parentState]) {
wideRExtent_start[parentState] = end;
}
}
}
// end if foundBetter
}
// end for leftRules
}
// do right restricted rules
for (int rightState = 0; rightState < numStates; rightState++) {
int narrowL = narrowLExtent_end[rightState];
if (narrowL <= start) {
continue;
}
BinaryRule[] rightRules = bg.splitRulesWithRC(rightState);
// if (spillGuts) System.out.println("Found " + rightRules.length + " right rules for state " + stateIndex.get(rightState));
for (BinaryRule rule : rightRules) {
// if (spillGuts) System.out.println("Considering rule for " + start + " to " + end + ": " + rightRules[i]);
int leftChild = rule.leftChild;
int narrowR = narrowRExtent_start[leftChild];
if (narrowR > narrowL) {
continue;
}
int min2 = wideLExtent_end[rightState];
int min = (narrowR > min2 ? narrowR : min2);
// Erik Frey 2009-12-17: This is unnecessary: narrowR is <= narrowL (established in previous check) and wideLExtent[e][r] is always <= narrowLExtent[e][r] by design, so the check will never evaluate true.
// if (min > narrowL) {
// continue;
// }
int max1 = wideRExtent_start[leftChild];
int max = (max1 < narrowL ? max1 : narrowL);
if (min > max) {
continue;
}
float pS = rule.score;
int parentState = rule.parent;
float oldIScore = iScore_start_end[parentState];
float bestIScore = oldIScore;
// always initialized below
boolean foundBetter;
//System.out.println("Start "+start+" end "+end+" min "+min+" max "+max);
if (!lengthNormalization) {
// find the split that can use this rule to make the max score
for (int split = min; split <= max; split++) {
if (constraints != null) {
boolean skip = false;
for (ParserConstraint c : constraints) {
if (((start < c.start && end >= c.end) || (start <= c.start && end > c.end)) && split > c.start && split < c.end) {
skip = true;
break;
}
if ((start == c.start && split == c.end)) {
String tag = stateIndex.get(leftChild);
Matcher m = c.state.matcher(tag);
if (!m.matches()) {
//if (!tag.startsWith(c.state+"^")) {
skip = true;
break;
}
}
if ((split == c.start && end == c.end)) {
String tag = stateIndex.get(rightState);
Matcher m = c.state.matcher(tag);
if (!m.matches()) {
//if (!tag.startsWith(c.state+"^")) {
skip = true;
break;
}
}
}
if (skip) {
continue;
}
}
float lS = iScore_start[split][leftChild];
// jab [2014]: oddly enough, removing these tests helps the chinese parser but not the english parser.
if (lS == Float.NEGATIVE_INFINITY) {
continue;
}
float rS = iScore[split][end][rightState];
if (rS == Float.NEGATIVE_INFINITY) {
continue;
}
float tot = pS + lS + rS;
if (tot > bestIScore) {
bestIScore = tot;
}
}
// end for split
foundBetter = bestIScore > oldIScore;
} else {
// find split that uses this rule to make the max *length normalized* score
int bestWordsInSpan = wordsInSpan[start][end][parentState];
float oldNormIScore = oldIScore / bestWordsInSpan;
float bestNormIScore = oldNormIScore;
for (int split = min; split <= max; split++) {
float lS = iScore_start[split][leftChild];
if (lS == Float.NEGATIVE_INFINITY) {
continue;
}
float rS = iScore[split][end][rightState];
if (rS == Float.NEGATIVE_INFINITY) {
continue;
}
float tot = pS + lS + rS;
int newWordsInSpan = wordsInSpan[start][split][leftChild] + wordsInSpan[split][end][rightState];
float normTot = tot / newWordsInSpan;
if (normTot > bestNormIScore) {
bestIScore = tot;
bestNormIScore = normTot;
bestWordsInSpan = newWordsInSpan;
}
}
// end for split
foundBetter = bestNormIScore > oldNormIScore;
if (foundBetter) {
wordsInSpan[start][end][parentState] = bestWordsInSpan;
}
}
// end if lengthNormalization
if (foundBetter) {
// this way of making "parentState" is better than previous
iScore_start_end[parentState] = bestIScore;
if (spillGuts)
log.info("Could build " + stateIndex.get(parentState) + " from " + start + " to " + end + " with score " + bestIScore);
if (oldIScore == Float.NEGATIVE_INFINITY) {
if (start > narrowLExtent_end[parentState]) {
narrowLExtent_end[parentState] = wideLExtent_end[parentState] = start;
} else if (start < wideLExtent_end[parentState]) {
wideLExtent_end[parentState] = start;
}
if (end < narrowRExtent_start[parentState]) {
narrowRExtent_start[parentState] = wideRExtent_start[parentState] = end;
} else if (end > wideRExtent_start[parentState]) {
wideRExtent_start[parentState] = end;
}
}
}
// end if foundBetter
}
// for rightRules
}
// for rightState
if (spillGuts) {
tick("Unaries for span " + diff + "...");
}
// do unary rules -- one could promote this loop and put start inside
for (int state = 0; state < numStates; state++) {
float iS = iScore_start_end[state];
if (iS == Float.NEGATIVE_INFINITY) {
continue;
}
UnaryRule[] unaries = ug.closedRulesByChild(state);
for (UnaryRule ur : unaries) {
if (constraints != null) {
boolean skip = false;
for (ParserConstraint c : constraints) {
if ((start == c.start && end == c.end)) {
String tag = stateIndex.get(ur.parent);
Matcher m = c.state.matcher(tag);
if (!m.matches()) {
//if (!tag.startsWith(c.state+"^")) {
skip = true;
break;
}
}
}
if (skip) {
continue;
}
}
int parentState = ur.parent;
float pS = ur.score;
float tot = iS + pS;
float cur = iScore_start_end[parentState];
// always set below
boolean foundBetter;
if (lengthNormalization) {
int totWordsInSpan = wordsInSpan[start][end][state];
float normTot = tot / totWordsInSpan;
int curWordsInSpan = wordsInSpan[start][end][parentState];
float normCur = cur / curWordsInSpan;
foundBetter = normTot > normCur;
if (foundBetter) {
wordsInSpan[start][end][parentState] = wordsInSpan[start][end][state];
}
} else {
foundBetter = (tot > cur);
}
if (foundBetter) {
if (spillGuts)
log.info("Could build " + stateIndex.get(parentState) + " from " + start + " to " + end + " with score " + tot);
iScore_start_end[parentState] = tot;
if (cur == Float.NEGATIVE_INFINITY) {
if (start > narrowLExtent_end[parentState]) {
narrowLExtent_end[parentState] = wideLExtent_end[parentState] = start;
} else if (start < wideLExtent_end[parentState]) {
wideLExtent_end[parentState] = start;
}
if (end < narrowRExtent_start[parentState]) {
narrowRExtent_start[parentState] = wideRExtent_start[parentState] = end;
} else if (end > wideRExtent_start[parentState]) {
wideRExtent_start[parentState] = end;
}
}
}
// end if foundBetter
}
// for UnaryRule r
}
// for unary rules
}
Aggregations