use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class CoNLLUDocumentWriter method printSemanticGraph.
public String printSemanticGraph(SemanticGraph sg, boolean unescapeParenthesis) {
StringBuilder sb = new StringBuilder();
/* Print comments. */
for (String comment : sg.getComments()) {
sb.append(comment).append("\n");
}
for (IndexedWord token : sg.vertexListSorted()) {
/* Check for multiword tokens. */
if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
IntPair tokenSpan = token.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class);
if (tokenSpan.getSource() == token.index()) {
String range = String.format("%d-%d", tokenSpan.getSource(), tokenSpan.getTarget());
sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.originalText()));
}
}
/* Try to find main governor and additional dependencies. */
int govIdx = -1;
GrammaticalRelation reln = null;
HashMap<Integer, String> additionalDeps = new HashMap<>();
for (IndexedWord parent : sg.getParents(token)) {
SemanticGraphEdge edge = sg.getEdge(parent, token);
if (govIdx == -1 && !edge.isExtra()) {
govIdx = parent.index();
reln = edge.getRelation();
} else {
additionalDeps.put(parent.index(), edge.getRelation().toString());
}
}
String additionalDepsString = CoNLLUUtils.toExtraDepsString(additionalDeps);
String word = token.word();
String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
String relnName = reln == null ? "_" : reln.toString();
/* Root. */
if (govIdx == -1 && sg.getRoots().contains(token)) {
govIdx = 0;
relnName = GrammaticalRelation.ROOT.toString();
}
if (unescapeParenthesis) {
word = word.replaceAll(LRB_PATTERN, "(");
word = word.replaceAll(RRB_PATTERN, ")");
lemma = lemma.replaceAll(LRB_PATTERN, "(");
lemma = lemma.replaceAll(RRB_PATTERN, ")");
}
sb.append(String.format("%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s%n", token.index(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
}
sb.append("\n");
return sb.toString();
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalDependenciesFeatureAnnotator method getGraphFeatures.
private static HashMap<String, String> getGraphFeatures(SemanticGraph sg, IndexedWord word) {
HashMap<String, String> features = new HashMap<>();
/* Determine the case of "you". */
if (word.tag().equals("PRP") && (word.value().equalsIgnoreCase("you") || word.value().equalsIgnoreCase("it"))) {
features.put("Case", pronounCase(sg, word));
}
/* Determine the person of "was". */
if (word.tag().equals("VBD") && word.value().equalsIgnoreCase("was")) {
String person = wasPerson(sg, word);
if (person != null) {
features.put("Person", person);
}
}
/* Determine features of relative and interrogative pronouns. */
features.putAll(getRelAndIntPronFeatures(sg, word));
/* Determine features of gerunds and present participles. */
if (word.tag().equals("VBG")) {
if (hasBeAux(sg, word)) {
features.put("VerbForm", "Part");
features.put("Tense", "Pres");
} else {
features.put("VerbForm", "Ger");
}
}
/* Determine whether reflexive pronoun is reflexive or intensive. */
if (word.value().matches(SELF_REGEX) && word.tag().equals("PRP")) {
IndexedWord parent = sg.getParent(word);
if (parent != null) {
SemanticGraphEdge edge = sg.getEdge(parent, word);
if (edge.getRelation() != UniversalEnglishGrammaticalRelations.NP_ADVERBIAL_MODIFIER) {
features.put("Case", "Acc");
features.put("Reflex", "Yes");
}
}
}
/* Voice feature. */
if (word.tag().equals("VBN")) {
if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER)) {
features.put("Voice", "Pass");
}
}
return features;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalDependenciesFeatureAnnotator method main.
public static void main(String[] args) throws IOException {
if (args.length < 2) {
log.info("Usage: ");
log.info("java ");
log.info(UniversalDependenciesFeatureAnnotator.class.getCanonicalName());
log.info(" CoNLL-U_file tree_file [-addUPOS -escapeParenthesis]");
return;
}
String coNLLUFile = args[0];
String treeFile = args[1];
boolean addUPOS = false;
boolean escapeParens = false;
for (int i = 2; i < args.length; i++) {
if (args[i].equals("-addUPOS")) {
addUPOS = true;
} else if (args[i].equals("-escapeParenthesis")) {
escapeParens = true;
}
}
UniversalDependenciesFeatureAnnotator featureAnnotator = new UniversalDependenciesFeatureAnnotator();
Reader r = IOUtils.readerFromString(coNLLUFile);
CoNLLUDocumentReader depReader = new CoNLLUDocumentReader();
CoNLLUDocumentWriter depWriter = new CoNLLUDocumentWriter();
Iterator<SemanticGraph> it = depReader.getIterator(r);
Iterator<Tree> treeIt = treebankIterator(treeFile);
while (it.hasNext()) {
SemanticGraph sg = it.next();
Tree t = treeIt.next();
if (t == null || t.yield().size() != sg.size()) {
StringBuilder sentenceSb = new StringBuilder();
for (IndexedWord word : sg.vertexListSorted()) {
sentenceSb.append(word.get(CoreAnnotations.TextAnnotation.class));
sentenceSb.append(' ');
}
throw new RuntimeException("CoNLL-U file and tree file are not aligned. \n" + "Sentence: " + sentenceSb + '\n' + "Tree: " + t.pennString());
}
featureAnnotator.addFeatures(sg, t, true, addUPOS);
System.out.print(depWriter.printSemanticGraph(sg, !escapeParens));
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalDependenciesFeatureAnnotator method pronounCase.
/**
* Determine the case of the pronoun "you" or "it".
*/
private static String pronounCase(SemanticGraph sg, IndexedWord word) {
word = sg.getNodeByIndex(word.index());
IndexedWord parent = sg.getParent(word);
if (parent != null) {
SemanticGraphEdge edge = sg.getEdge(parent, word);
if (edge != null) {
if (UniversalEnglishGrammaticalRelations.OBJECT.isAncestor(edge.getRelation())) {
/* "you" is an object. */
return "Acc";
} else if (UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER.isAncestor(edge.getRelation()) || edge.getRelation() == GrammaticalRelation.ROOT) {
if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
/* "you" is the head of a prepositional phrase. */
return "Acc";
}
}
}
}
return "Nom";
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalDependenciesFeatureAnnotator method getRelAndIntPronFeatures.
/**
* Extracts features from relative and interrogative pronouns.
*/
private static HashMap<String, String> getRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word) {
HashMap<String, String> features = new HashMap<>();
if (word.tag().startsWith("W")) {
boolean isRel = false;
IndexedWord parent = sg.getParent(word);
if (parent != null) {
IndexedWord parentParent = sg.getParent(parent);
if (parentParent != null) {
SemanticGraphEdge edge = sg.getEdge(parentParent, parent);
isRel = edge.getRelation().equals(UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER);
}
}
if (isRel) {
features.put("PronType", "Rel");
} else {
if (word.value().equalsIgnoreCase("that")) {
features.put("PronType", "Dem");
} else {
features.put("PronType", "Int");
}
}
}
return features;
}
Aggregations