use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method processComplex2WP.
/**
* Processes all the two-word prepositions in TWO_WORD_PREPS_COMPLEX.
*/
private static void processComplex2WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> bigrams) {
for (String bigram : TWO_WORD_PREPS_COMPLEX) {
if (bigrams.get(bigram) == null) {
continue;
}
for (Integer i : bigrams.get(bigram)) {
IndexedWord w1 = sg.getNodeByIndexSafe(i);
IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
if (w1 == null || w2 == null) {
continue;
}
SemgrexMatcher matcher = TWO_WORD_PREPS_COMPLEX_PATTERN.matcher(sg);
IndexedWord gov = null;
IndexedWord gov2 = null;
while (matcher.find()) {
if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
gov = matcher.getNode("gov");
gov2 = matcher.getNode("gov2");
break;
}
}
if (gov2 == null) {
continue;
}
/* Attach the head of the prepositional phrase to
* the head of w1. */
if (sg.getRoots().contains(w1)) {
SemanticGraphEdge edge = sg.getEdge(w1, gov2);
if (edge == null) {
continue;
}
sg.removeEdge(edge);
sg.getRoots().remove(w1);
sg.addRoot(gov2);
} else {
SemanticGraphEdge edge = sg.getEdge(w1, gov2);
if (edge == null) {
continue;
}
sg.removeEdge(edge);
gov = gov == null ? sg.getParent(w1) : gov;
if (gov == null) {
continue;
}
/* Determine the relation to use. If it is a relation that can
* join two clauses and w1 is the head of a copular construction, then
* use the relation of w1 and its parent. Otherwise use the relation of edge. */
GrammaticalRelation reln = edge.getRelation();
if (sg.hasChildWithReln(w1, COPULA)) {
GrammaticalRelation reln2 = sg.getEdge(gov, w1).getRelation();
if (clauseRelations.contains(reln2)) {
reln = reln2;
}
}
sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
}
/* Make children of w1 dependents of gov2. */
for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w1)) {
sg.removeEdge(edge2);
sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
}
createMultiWordExpression(sg, gov2, CASE_MARKER, w1, w2);
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method processNames.
/**
*
* Looks for NPs that should have the {@code name} relation and
* a) changes the structure such that the leftmost token becomes the head
* b) changes the relation from {@code compound} to {@code name}.
*
* Requires NER tags.
*
* @param sg A semantic graph.
*/
private static void processNames(SemanticGraph sg) {
if (!USE_NAME) {
return;
}
// check whether NER tags are available
IndexedWord rootToken = sg.getFirstRoot();
if (rootToken == null || !rootToken.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) {
return;
}
SemanticGraph sgCopy = sg.makeSoftCopy();
for (SemgrexPattern pattern : NAME_PATTERNS) {
SemgrexMatcher matcher = pattern.matcher(sgCopy);
List<IndexedWord> nameParts = new ArrayList<>();
IndexedWord head = null;
while (matcher.find()) {
IndexedWord w1 = matcher.getNode("w1");
IndexedWord w2 = matcher.getNode("w2");
if (head != w1) {
if (head != null) {
processNamesHelper(sg, head, nameParts);
nameParts = new ArrayList<>();
}
head = w1;
}
if (w2.ner().equals(w1.ner())) {
nameParts.add(w2);
}
}
if (head != null) {
processNamesHelper(sg, head, nameParts);
sgCopy = sg.makeSoftCopy();
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class NaturalLogicWeightsITest method mkSegment.
private Pair<SemanticGraphEdge, List<SemanticGraphEdge>> mkSegment(String root, Pair<String, String>... outEdges) {
IndexedWord rootVertex = new IndexedWord(mockWord(root));
List<SemanticGraphEdge> edges = Arrays.asList(outEdges).stream().map(pair -> new SemanticGraphEdge(rootVertex, new IndexedWord(mockWord(pair.second)), GrammaticalRelation.valueOf(Language.English, pair.first), Double.NEGATIVE_INFINITY, false)).collect(Collectors.toList());
return Pair.makePair(edges.get(0), edges);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class DependencyIndexITest method checkTree.
private static void checkTree(Tree tree) {
List<Tree> leaves = tree.getLeaves();
for (Tree leaf : leaves) {
CoreLabel l = null;
if (leaf.label() instanceof CoreLabel)
l = (CoreLabel) leaf.label();
if (l != null) {
// System.err.println(l + " " + l.get(CoreAnnotations.IndexAnnotation.class));
int index = l.get(CoreAnnotations.IndexAnnotation.class);
String text = l.get(CoreAnnotations.TextAnnotation.class);
if (text.equals("Mary"))
assertEquals(1, index);
else if (text.equals("had"))
assertEquals(2, index);
else if (text.equals("a"))
assertEquals(3, index);
else if (text.equals("little"))
assertEquals(4, index);
else if (text.equals("lamb"))
assertEquals(5, index);
else if (text.equals("."))
assertEquals(6, index);
} else {
// System.err.println(leaf + " is not a CoreLabel.");
}
}
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> deps = gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL);
// System.out.println(deps);
// collect all nodes in deps
Set<IndexedWord> nodes = Generics.newHashSet();
for (TypedDependency dep : deps) {
nodes.add(dep.gov());
nodes.add(dep.dep());
}
// check the indices for all nodes
for (IndexedWord n : nodes) {
String text = n.value();
int index = n.get(CoreAnnotations.IndexAnnotation.class);
if (text.equals("Mary"))
assertEquals(1, index);
else if (text.equals("had"))
assertEquals(2, index);
else if (text.equals("a"))
assertEquals(3, index);
else if (text.equals("little"))
assertEquals(4, index);
else if (text.equals("lamb"))
assertEquals(5, index);
else if (text.equals("."))
assertEquals(6, index);
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraph method toReadableString.
private String toReadableString() {
StringBuilder buf = new StringBuilder();
buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov"));
buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---"));
for (IndexedWord root : getRoots()) {
buf.append(String.format("%-20s%-20s%-20s%n", root.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), "root", "root"));
}
for (SemanticGraphEdge edge : this.edgeListSorted()) {
buf.append(String.format("%-20s%-20s%-20s%n", edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), edge.getRelation().toString(), edge.getSource().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX)));
}
return buf.toString();
}
Aggregations