use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class RelationTripleSegmenter method segment.
/**
* <p>
* Try to segment this sentence as a relation triple.
* This sentence must already match one of a few strict patterns for a valid OpenIE extraction.
* If it does not, then no relation triple is created.
* That is, this is <b>not</b> a relation extractor; it is just a utility to segment what is already a
* (subject, relation, object) triple into these three parts.
* </p>
*
* <p>
* This method will attempt to use both the verb-centric patterns and the ACL-centric patterns.
* </p>
*
* @param parse The sentence to process, as a dependency tree.
* @param confidence An optional confidence to pass on to the relation triple.
* @param consumeAll if true, force the entire parse to be consumed by the pattern.
* @return A relation triple, if this sentence matches one of the patterns of a valid relation triple.
*/
public Optional<RelationTriple> segment(SemanticGraph parse, Optional<Double> confidence, boolean consumeAll) {
// Copy and clean the tree
parse = new SemanticGraph(parse);
// Special case "there is <something>". Arguably this is a job for the clause splitter, but the <something> is
// sometimes not _really_ its own clause
IndexedWord root = parse.getFirstRoot();
if ((root.lemma() != null && root.lemma().equalsIgnoreCase("be")) || (root.lemma() == null && ("is".equalsIgnoreCase(root.word()) || "are".equalsIgnoreCase(root.word()) || "were".equalsIgnoreCase(root.word()) || "be".equalsIgnoreCase(root.word())))) {
// Check for the "there is" construction
boolean foundThere = false;
// an indicator for there being too much nonsense hanging off of the root
boolean tooMayArcs = false;
Optional<SemanticGraphEdge> newRoot = Optional.empty();
for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(root)) {
if (edge.getRelation().toString().equals("expl") && edge.getDependent().word().equalsIgnoreCase("there")) {
foundThere = true;
} else if (edge.getRelation().toString().equals("nsubj")) {
newRoot = Optional.of(edge);
} else {
tooMayArcs = true;
}
}
// Split off "there is")
if (foundThere && newRoot.isPresent() && !tooMayArcs) {
ClauseSplitterSearchProblem.splitToChildOfEdge(parse, newRoot.get());
}
}
// Run the patterns
Optional<RelationTriple> extraction = segmentVerb(parse, confidence, consumeAll);
if (!extraction.isPresent()) {
extraction = segmentACL(parse, confidence, consumeAll);
}
//
if (extraction.isPresent()) {
boolean shouldRemove = true;
for (CoreLabel token : extraction.get()) {
if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
shouldRemove = false;
}
}
if (shouldRemove) {
return Optional.empty();
}
}
// Return
return extraction;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SentenceFragment method paddedWords.
/**
* Return the tokens in this fragment, but padded with null so that the index in this
* sentence matches the index of the parse tree.
*/
public List<CoreLabel> paddedWords() {
int maxIndex = -1;
for (IndexedWord vertex : parseTree.vertexSet()) {
maxIndex = Math.max(maxIndex, vertex.index());
}
List<CoreLabel> tokens = new ArrayList<>(maxIndex);
for (int i = 0; i < maxIndex; ++i) {
tokens.add(null);
}
for (CoreLabel token : this.words) {
tokens.set(token.index() - 1, token);
}
return tokens;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class Util method cleanTree.
/**
* Fix some bizarre peculiarities with certain trees.
* So far, these include:
* <ul>
* <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
* </ul>
*
* @param tree The tree to clean (in place!).
* @return A list of extra edges, which are valid but were removed.
*/
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
// assert !isCyclic(tree);
// Clean nodes
List<IndexedWord> toDelete = new ArrayList<>();
for (IndexedWord vertex : tree.vertexSet()) {
// Clean punctuation
if (vertex.tag() == null) {
continue;
}
char tag = vertex.backingLabel().tag().charAt(0);
if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') {
if (!tree.outgoingEdgeIterator(vertex).hasNext()) {
// This should really never happen, but it does.
toDelete.add(vertex);
}
}
}
toDelete.forEach(tree::removeVertex);
// Clean edges
Iterator<SemanticGraphEdge> iter = tree.edgeIterable().iterator();
List<Triple<IndexedWord, IndexedWord, SemanticGraphEdge>> toAdd = new ArrayList<>();
toDelete.clear();
while (iter.hasNext()) {
SemanticGraphEdge edge = iter.next();
if (edge.getDependent().index() == edge.getGovernor().index()) {
// Clean up copy-edges
if (edge.getDependent().isCopy(edge.getGovernor())) {
for (SemanticGraphEdge toCopy : tree.outgoingEdgeIterable(edge.getDependent())) {
toAdd.add(Triple.makeTriple(edge.getGovernor(), toCopy.getDependent(), toCopy));
}
toDelete.add(edge.getDependent());
}
if (edge.getGovernor().isCopy(edge.getDependent())) {
for (SemanticGraphEdge toCopy : tree.outgoingEdgeIterable(edge.getGovernor())) {
toAdd.add(Triple.makeTriple(edge.getDependent(), toCopy.getDependent(), toCopy));
}
toDelete.add(edge.getGovernor());
}
// Clean self-edges
iter.remove();
} else if (edge.getRelation().toString().equals("punct")) {
// Clean punctuation (again)
if (!tree.outgoingEdgeIterator(edge.getDependent()).hasNext()) {
// This should really never happen, but it does.
iter.remove();
}
}
}
// (add edges we wanted to add)
toDelete.forEach(tree::removeVertex);
for (Triple<IndexedWord, IndexedWord, SemanticGraphEdge> edge : toAdd) {
tree.addEdge(edge.first, edge.second, edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
}
// Handle extra edges.
// Two cases:
// (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
// in this case, keep the extra
// (2) otherwise, delete the extra
List<SemanticGraphEdge> extraEdges = new ArrayList<>();
for (SemanticGraphEdge edge : tree.edgeIterable()) {
if (edge.isExtra()) {
List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(edge.getDependent());
SemanticGraphEdge toKeep = null;
for (SemanticGraphEdge candidate : incomingEdges) {
if (toKeep == null) {
toKeep = candidate;
} else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
toKeep = candidate;
} else if (!candidate.isExtra() && !(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
toKeep = candidate;
}
}
for (SemanticGraphEdge candidate : incomingEdges) {
if (candidate != toKeep) {
extraEdges.add(candidate);
}
}
}
}
extraEdges.forEach(tree::removeEdge);
// Add apposition edges (simple coref)
for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) {
// note[gabor] prevent concurrent modification exception
for (SemanticGraphEdge candidateAppos : tree.incomingEdgeIterable(extraEdge.getDependent())) {
if (candidateAppos.getRelation().toString().equals("appos")) {
extraEdges.add(new SemanticGraphEdge(extraEdge.getGovernor(), candidateAppos.getGovernor(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra()));
}
}
for (SemanticGraphEdge candidateAppos : tree.outgoingEdgeIterable(extraEdge.getDependent())) {
if (candidateAppos.getRelation().toString().equals("appos")) {
extraEdges.add(new SemanticGraphEdge(extraEdge.getGovernor(), candidateAppos.getDependent(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra()));
}
}
}
// Brute force ensure tree
// Remove incoming edges from roots
List<SemanticGraphEdge> rootIncomingEdges = new ArrayList<>();
for (IndexedWord root : tree.getRoots()) {
for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(root)) {
rootIncomingEdges.add(incomingEdge);
}
}
rootIncomingEdges.forEach(tree::removeEdge);
// Loop until it becomes a tree.
boolean changed = true;
while (changed) {
// I just want trees to be trees; is that so much to ask!?
changed = false;
List<IndexedWord> danglingNodes = new ArrayList<>();
List<SemanticGraphEdge> invalidEdges = new ArrayList<>();
for (IndexedWord vertex : tree.vertexSet()) {
// Collect statistics
Iterator<SemanticGraphEdge> incomingIter = tree.incomingEdgeIterator(vertex);
boolean hasIncoming = incomingIter.hasNext();
boolean hasMultipleIncoming = false;
if (hasIncoming) {
incomingIter.next();
hasMultipleIncoming = incomingIter.hasNext();
}
// Register actions
if (!hasIncoming && !tree.getRoots().contains(vertex)) {
danglingNodes.add(vertex);
} else {
if (hasMultipleIncoming) {
for (SemanticGraphEdge edge : new IterableIterator<>(incomingIter)) {
invalidEdges.add(edge);
}
}
}
}
// Perform actions
for (IndexedWord vertex : danglingNodes) {
tree.removeVertex(vertex);
changed = true;
}
for (SemanticGraphEdge edge : invalidEdges) {
tree.removeEdge(edge);
changed = true;
}
}
// This is a common parse error.
for (IndexedWord vertex : tree.vertexSet()) {
SemanticGraphEdge thatEdge = null;
int dobjCount = 0;
for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) {
if ("that".equalsIgnoreCase(edge.getDependent().word())) {
thatEdge = edge;
}
if ("dobj".equals(edge.getRelation().toString())) {
dobjCount += 1;
}
}
if (dobjCount > 1 && thatEdge != null) {
// Case: there are two dobj edges, one of which goes to the word "that"
// Action: rewrite the dobj edge to "that" to be a "mark" edge.
tree.removeEdge(thatEdge);
tree.addEdge(thatEdge.getGovernor(), thatEdge.getDependent(), GrammaticalRelation.valueOf(thatEdge.getRelation().getLanguage(), "mark"), thatEdge.getWeight(), thatEdge.isExtra());
}
}
// Return
assert isTree(tree);
return extraEdges;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class Util method isCyclic.
/**
* Determine if a tree is cyclic.
* @param tree The tree to check.
* @return True if the tree has at least once cycle in it.
*/
public static boolean isCyclic(SemanticGraph tree) {
for (IndexedWord vertex : tree.vertexSet()) {
if (tree.getRoots().contains(vertex)) {
continue;
}
IndexedWord node = tree.incomingEdgeIterator(vertex).next().getGovernor();
Set<IndexedWord> seen = new HashSet<>();
seen.add(vertex);
while (node != null) {
if (seen.contains(node)) {
return true;
}
seen.add(node);
if (tree.incomingEdgeIterator(node).hasNext()) {
node = tree.incomingEdgeIterator(node).next().getGovernor();
} else {
node = null;
}
}
}
return false;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class Util method isTree.
/**
* A little utility function to make sure a SemanticGraph is a tree.
* @param tree The tree to check.
* @return True if this {@link edu.stanford.nlp.semgraph.SemanticGraph} is a tree (versus a DAG, or Graph).
*/
public static boolean isTree(SemanticGraph tree) {
for (IndexedWord vertex : tree.vertexSet()) {
// Check one and only one incoming edge
if (tree.getRoots().contains(vertex)) {
if (tree.incomingEdgeIterator(vertex).hasNext()) {
return false;
}
} else {
Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(vertex);
if (!iter.hasNext()) {
return false;
}
iter.next();
if (iter.hasNext()) {
return false;
}
}
// Check incoming and outgoing edges match
for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) {
boolean foundReverse = false;
for (SemanticGraphEdge reverse : tree.incomingEdgeIterable(edge.getDependent())) {
if (reverse == edge) {
foundReverse = true;
}
}
if (!foundReverse) {
return false;
}
}
for (SemanticGraphEdge edge : tree.incomingEdgeIterable(vertex)) {
boolean foundReverse = false;
for (SemanticGraphEdge reverse : tree.outgoingEdgeIterable(edge.getGovernor())) {
if (reverse == edge) {
foundReverse = true;
}
}
if (!foundReverse) {
return false;
}
}
}
// Check for cycles
if (isCyclic(tree)) {
return false;
}
// }
return true;
}
Aggregations