use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class TSVUtils method parseJsonTree.
/**
* Parse a JSON formatted tree into a SemanticGraph.
* @param jsonString The JSON string tree to parse, e.g:
* "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]");
* @param tokens The tokens of the sentence, to form the backing labels of the tree.
* @return A semantic graph of the sentence, according to the given tree.
*/
public static SemanticGraph parseJsonTree(String jsonString, List<CoreLabel> tokens) {
// Escape quoted string parts
JsonReader json = Json.createReader(new StringReader(jsonString));
SemanticGraph tree = new SemanticGraph();
JsonArray array = json.readArray();
if (array == null || array.isEmpty()) {
return tree;
}
IndexedWord[] vertices = new IndexedWord[tokens.size() + 2];
// Add edges
for (int i = 0; i < array.size(); i++) {
JsonObject entry = array.getJsonObject(i);
// Parse row
int dependentIndex = entry.getInt("dependent");
if (vertices[dependentIndex] == null) {
if (dependentIndex > tokens.size()) {
// Bizarre mismatch in sizes; the malt parser seems to do this often
return new SemanticGraph();
}
vertices[dependentIndex] = new IndexedWord(tokens.get(dependentIndex - 1));
}
IndexedWord dependent = vertices[dependentIndex];
int governorIndex = entry.getInt("governor");
if (governorIndex > tokens.size()) {
// Bizarre mismatch in sizes; the malt parser seems to do this often
return new SemanticGraph();
}
if (vertices[governorIndex] == null && governorIndex > 0) {
vertices[governorIndex] = new IndexedWord(tokens.get(governorIndex - 1));
}
IndexedWord governor = vertices[governorIndex];
String relation = entry.getString("dep");
// Process row
if (governorIndex == 0) {
tree.addRoot(dependent);
} else {
tree.addVertex(dependent);
if (!tree.containsVertex(governor)) {
tree.addVertex(governor);
}
if (!"ref".equals(relation)) {
tree.addEdge(governor, dependent, GrammaticalRelation.valueOf(Language.English, relation), Double.NEGATIVE_INFINITY, false);
}
}
}
return tree;
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalDependenciesConverter method main.
/**
*
* Converts a constituency tree to the English basic, enhanced, or
* enhanced++ Universal dependencies representation, or an English basic
* Universal dependencies tree to the enhanced or enhanced++ representation.
*
* Command-line options:<br>
* {@code -treeFile}: File with PTB-formatted constituency trees<br>
* {@code -conlluFile}: File with basic dependency trees in CoNLL-U format<br>
* {@code -outputRepresentation}: "basic" (default), "enhanced", or "enhanced++"
*
*/
public static void main(String[] args) {
Properties props = StringUtils.argsToProperties(args);
String treeFileName = props.getProperty("treeFile");
String conlluFileName = props.getProperty("conlluFile");
String outputRepresentation = props.getProperty("outputRepresentation", "basic");
// = null;
Iterator<SemanticGraph> sgIterator;
if (treeFileName != null) {
MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
tb.loadPath(treeFileName);
Iterator<Tree> treeIterator = tb.iterator();
sgIterator = new TreeToSemanticGraphIterator(treeIterator);
} else if (conlluFileName != null) {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
try {
sgIterator = reader.getIterator(IOUtils.readerFromString(conlluFileName));
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
System.err.println("No input file specified!");
System.err.println("");
System.err.printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", UniversalDependenciesConverter.class.getCanonicalName());
return;
}
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
while (sgIterator.hasNext()) {
SemanticGraph sg = sgIterator.next();
if (treeFileName != null) {
//add UPOS tags
Tree tree = ((TreeToSemanticGraphIterator) sgIterator).getCurrentTree();
Tree uposTree = UniversalPOSMapper.mapTree(tree);
List<Label> uposLabels = uposTree.preTerminalYield();
for (IndexedWord token : sg.vertexListSorted()) {
int idx = token.index() - 1;
String uposTag = uposLabels.get(idx).value();
token.set(CoreAnnotations.CoarseTagAnnotation.class, uposTag);
}
} else {
addLemmata(sg);
if (USE_NAME) {
addNERTags(sg);
}
}
if (outputRepresentation.equalsIgnoreCase("enhanced")) {
sg = convertBasicToEnhanced(sg);
} else if (outputRepresentation.equalsIgnoreCase("enhanced++")) {
sg = convertBasicToEnhancedPlusPlus(sg);
}
System.out.print(writer.printSemanticGraph(sg));
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalDependenciesConverter method convertTreeToBasic.
private static SemanticGraph convertTreeToBasic(Tree tree) {
addLemmata(tree);
addNERTags(tree);
SemanticGraph sg = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE, null, false, true);
addLemmata(sg);
if (USE_NAME) {
addNERTags(sg);
}
return sg;
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class TSVSentenceIterator method toCoreMap.
public static CoreMap toCoreMap(List<SentenceField> fields, List<String> entries) {
CoreMap map = new ArrayCoreMap(fields.size());
Optional<List<CoreLabel>> tokens = Optional.empty();
// First pass - process all token level stuff.
for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
SentenceField field = entry.first;
String value = unescapeSQL(entry.second);
switch(field) {
case WORDS:
{
List<String> values = TSVUtils.parseArray(value);
if (!tokens.isPresent()) {
tokens = Optional.of(new ArrayList<>(values.size()));
for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
}
int beginChar = 0;
for (int i = 0; i < values.size(); i++) {
tokens.get().get(i).setValue(values.get(i));
tokens.get().get(i).setWord(values.get(i));
tokens.get().get(i).setBeginPosition(beginChar);
tokens.get().get(i).setEndPosition(beginChar + values.get(i).length());
beginChar += values.get(i).length() + 1;
}
}
break;
case LEMMAS:
{
List<String> values = TSVUtils.parseArray(value);
if (!tokens.isPresent()) {
tokens = Optional.of(new ArrayList<>(values.size()));
for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
}
for (int i = 0; i < values.size(); i++) {
tokens.get().get(i).setLemma(values.get(i));
}
}
break;
case POS_TAGS:
{
List<String> values = TSVUtils.parseArray(value);
if (!tokens.isPresent()) {
tokens = Optional.of(new ArrayList<>(values.size()));
for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
}
for (int i = 0; i < values.size(); i++) {
tokens.get().get(i).setTag(values.get(i));
}
}
break;
case NER_TAGS:
{
List<String> values = TSVUtils.parseArray(value);
if (!tokens.isPresent()) {
tokens = Optional.of(new ArrayList<>(values.size()));
for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
}
for (int i = 0; i < values.size(); i++) {
tokens.get().get(i).setNER(values.get(i));
}
}
break;
default:
// ignore.
break;
}
}
// Document specific stuff.
Optional<String> docId = Optional.empty();
Optional<String> sentenceId = Optional.empty();
Optional<Integer> sentenceIndex = Optional.empty();
for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
SentenceField field = entry.first;
String value = unescapeSQL(entry.second);
switch(field) {
case ID:
sentenceId = Optional.of(value);
break;
case DOC_ID:
docId = Optional.of(value);
break;
case SENTENCE_INDEX:
sentenceIndex = Optional.of(Integer.parseInt(value));
break;
case GLOSS:
value = value.replace("\\n", "\n").replace("\\t", "\t");
map.set(CoreAnnotations.TextAnnotation.class, value);
break;
default:
// ignore.
break;
}
}
// High level document stuff
map.set(CoreAnnotations.SentenceIDAnnotation.class, sentenceId.orElse("-1"));
map.set(CoreAnnotations.DocIDAnnotation.class, docId.orElse("???"));
map.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(0));
// Doc-char
if (tokens.isPresent()) {
for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
SentenceField field = entry.first;
String value = unescapeSQL(entry.second);
switch(field) {
case DOC_CHAR_BEGIN:
{
List<String> values = TSVUtils.parseArray(value);
for (int i = 0; i < tokens.get().size(); i++) {
tokens.get().get(i).setBeginPosition(Integer.parseInt(values.get(i)));
}
}
break;
case DOC_CHAR_END:
{
List<String> values = TSVUtils.parseArray(value);
for (int i = 0; i < tokens.get().size(); i++) {
tokens.get().get(i).setEndPosition(Integer.parseInt(values.get(i)));
}
}
break;
default:
// ignore.
break;
}
}
}
// Final token level stuff.
if (tokens.isPresent()) {
for (int i = 0; i < tokens.get().size(); i++) {
tokens.get().get(i).set(CoreAnnotations.DocIDAnnotation.class, docId.orElse("???"));
tokens.get().get(i).set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
tokens.get().get(i).set(CoreAnnotations.IndexAnnotation.class, i + 1);
tokens.get().get(i).set(CoreAnnotations.TokenBeginAnnotation.class, i);
tokens.get().get(i).set(CoreAnnotations.TokenEndAnnotation.class, i + 1);
}
}
// Dependency trees
if (tokens.isPresent()) {
map.set(CoreAnnotations.TokensAnnotation.class, tokens.get());
map.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
map.set(CoreAnnotations.TokenEndAnnotation.class, tokens.get().size());
for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
SentenceField field = entry.first;
String value = unescapeSQL(entry.second);
switch(field) {
case DEPENDENCIES_BASIC:
{
SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
// if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class))
// map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
// if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class))
// map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
}
break;
case DEPENDENCIES_COLLAPSED:
{
SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
}
break;
case DEPENDENCIES_COLLAPSED_CC:
{
SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
// if (!map.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class))
// map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
// map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
}
break;
case DEPENDENCIES_ALTERNATE:
{
SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
map.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, graph);
}
break;
default:
// ignore.
break;
}
}
}
return map;
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class IETestUtils method parseCoNLL.
/**
* Parse a CoNLL formatted string into a SemanticGraph.
* This is useful for tests so that you don't need to load the model (and are robust to
* model changes).
*
* @param conll The CoNLL format for the tree.
* @return A semantic graph, as well as the flat tokens of the sentence.
*/
public static Pair<SemanticGraph, List<CoreLabel>> parseCoNLL(String conll) {
List<CoreLabel> sentence = new ArrayList<>();
SemanticGraph tree = new SemanticGraph();
for (String line : conll.split("\n")) {
if (line.trim().equals("")) {
continue;
}
String[] fields = line.trim().split("\\s+");
int index = Integer.parseInt(fields[0]);
String word = fields[1];
CoreLabel label = mkWord(word, index);
sentence.add(label);
if (fields[2].equals("0")) {
tree.addRoot(new IndexedWord(label));
} else {
tree.addVertex(new IndexedWord(label));
}
if (fields.length > 4) {
label.setTag(fields[4]);
}
if (fields.length > 5) {
label.setNER(fields[5]);
}
if (fields.length > 6) {
label.setLemma(fields[6]);
}
}
int i = 0;
for (String line : conll.split("\n")) {
if (line.trim().equals("")) {
continue;
}
String[] fields = line.trim().split("\\s+");
int parent = Integer.parseInt(fields[2]);
String reln = fields[3];
if (parent > 0) {
tree.addEdge(new IndexedWord(sentence.get(parent - 1)), new IndexedWord(sentence.get(i)), new GrammaticalRelation(Language.UniversalEnglish, reln, null, null), 1.0, false);
}
i += 1;
}
return Pair.makePair(tree, sentence);
}
Aggregations