use of edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion in project CoreNLP by stanfordnlp.
the class EntityClassifier method train.
private static void train(List<SceneGraphImage> images, String modelPath, Embedding embeddings) throws IOException {
RVFDataset<String, String> dataset = new RVFDataset<String, String>();
SceneGraphSentenceMatcher sentenceMatcher = new SceneGraphSentenceMatcher(embeddings);
for (SceneGraphImage img : images) {
for (SceneGraphImageRegion region : img.regions) {
SemanticGraph sg = region.getEnhancedSemanticGraph();
SemanticGraphEnhancer.enhance(sg);
List<Triple<IndexedWord, IndexedWord, String>> relationTriples = sentenceMatcher.getRelationTriples(region);
for (Triple<IndexedWord, IndexedWord, String> relation : relationTriples) {
IndexedWord w1 = sg.getNodeByIndexSafe(relation.first.index());
if (w1 != null) {
dataset.add(getDatum(w1, relation.first.get(SceneGraphCoreAnnotations.GoldEntityAnnotation.class), embeddings));
}
}
}
}
LinearClassifierFactory<String, String> classifierFactory = new LinearClassifierFactory<String, String>(new QNMinimizer(15), 1e-4, false, REG_STRENGTH);
Classifier<String, String> classifier = classifierFactory.trainClassifier(dataset);
IOUtils.writeObjectToFile(classifier, modelPath);
System.err.println(classifier.evaluateAccuracy(dataset));
}
use of edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion in project CoreNLP by stanfordnlp.
the class GenerateAlignmentData method main.
public static void main(String[] args) throws IOException {
Properties props = new Properties();
props.put("annotators", "tokenize,ssplit");
props.put("ssplit.eolonly", "true");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
String filename = args[0];
String sentences = args[1];
String graphs = args[2];
BufferedReader reader = IOUtils.readerFromString(filename);
PrintWriter sentencesFile = IOUtils.getPrintWriter(sentences);
PrintWriter graphsFile = IOUtils.getPrintWriter(graphs);
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
SceneGraphImage img = SceneGraphImage.readFromJSON(line);
if (img == null) {
continue;
}
for (SceneGraphImageRegion region : img.regions) {
Annotation doc = new Annotation(region.phrase);
pipeline.annotate(doc);
CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
String tokenizedSentence = StringUtils.join(tokens.stream().map(CoreLabel::word), " ");
for (SceneGraphImageAttribute attr : region.attributes) {
sentencesFile.printf("%s%n", tokenizedSentence);
graphsFile.printf("%s%n", StringUtils.join(attr.text));
}
for (SceneGraphImageRelationship reln : region.relationships) {
sentencesFile.printf("%s%n", tokenizedSentence);
graphsFile.printf("%s%n", StringUtils.join(reln.text));
}
}
}
}
use of edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion in project CoreNLP by stanfordnlp.
the class SceneGraphImageDependencyParser method main.
public static void main(String[] args) throws IOException {
DependencyParser parser = DependencyParser.loadFromModelFile(DependencyParser.DEFAULT_MODEL);
String filename = args[0];
BufferedReader reader = IOUtils.readerFromString(filename);
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
SceneGraphImage img = SceneGraphImage.readFromJSON(line);
if (img == null) {
continue;
}
for (SceneGraphImageRegion region : img.regions) {
if (region.tokens != null) {
region.gs = parser.predict(region.tokens);
}
}
System.out.println(img.toJSON());
}
}
use of edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion in project CoreNLP by stanfordnlp.
the class SceneGraphImagePCFGParser method main.
public static void main(String[] args) throws IOException {
LexicalizedParser parser = LexicalizedParser.getParserFromSerializedFile(PCFG_MODEL);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
String filename = args[0];
BufferedReader reader = IOUtils.readerFromString(filename);
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
SceneGraphImage img = SceneGraphImage.readFromJSON(line);
if (img == null) {
continue;
}
for (SceneGraphImageRegion region : img.regions) {
if (region.tokens != null) {
for (CoreLabel token : region.tokens) {
token.remove(CoreAnnotations.PartOfSpeechAnnotation.class);
}
Tree t = parser.apply(region.tokens);
region.gs = gsf.newGrammaticalStructure(t);
}
}
System.out.println(img.toJSON());
}
}
use of edu.stanford.nlp.scenegraph.image.SceneGraphImageRegion in project CoreNLP by stanfordnlp.
the class SceneGraphImageCleaner method lemmatize.
public void lemmatize(SceneGraphImage img) {
StanfordCoreNLP pipeline = getPipeline();
/* attributes */
for (SceneGraphImageAttribute attr : img.attributes) {
String attribute = removeDeterminersAndNumbers(removeFinalPunctuation(attr.attribute));
String sentence = String.format("She is %s .\n", attribute);
Annotation doc = new Annotation(sentence);
pipeline.annotate(doc);
CoreMap sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
List<CoreLabel> tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
attr.attributeGloss = tokens.subList(2, tokens.size() - 1);
String subject = removeDeterminersAndNumbers(removeFinalPunctuation(attr.text[0]));
sentence = String.format("The %s is tall .", subject);
doc = new Annotation(sentence);
pipeline.annotate(doc);
sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
attr.subjectGloss = tokens.subList(1, tokens.size() - 3);
attr.subject.labels.add(attr.subjectGloss);
}
/* relations */
for (SceneGraphImageRelationship reln : img.relationships) {
String object = removeDeterminersAndNumbers(removeFinalPunctuation(reln.text[2]));
String sentence = String.format("She is the %s .\n", object);
Annotation doc = new Annotation(sentence);
pipeline.annotate(doc);
CoreMap sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
List<CoreLabel> tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
reln.objectGloss = tokens.subList(3, tokens.size() - 1);
reln.object.labels.add(reln.objectGloss);
String subject = removeDeterminersAndNumbers(removeFinalPunctuation(reln.text[0]));
sentence = String.format("The %s is tall .", subject);
doc = new Annotation(sentence);
pipeline.annotate(doc);
sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
reln.subjectGloss = tokens.subList(1, tokens.size() - 3);
reln.subject.labels.add(reln.subjectGloss);
String predicate = removeDeterminersAndNumbers(removeFinalPunctuation(reln.predicate));
sentence = String.format("A horse %s an apple .", predicate);
doc = new Annotation(sentence);
pipeline.annotate(doc);
sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
reln.predicateGloss = tokens.subList(2, tokens.size() - 3);
}
for (SceneGraphImageObject object : img.objects) {
if (object.names.size() > object.labels.size()) {
for (String name : object.names) {
String x = removeDeterminersAndNumbers(removeFinalPunctuation(name));
String sentence = String.format("The %s is tall .", x);
Annotation doc = new Annotation(sentence);
pipeline.annotate(doc);
CoreMap sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
List<CoreLabel> tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
object.labels.add(tokens.subList(1, tokens.size() - 3));
}
}
}
StanfordCoreNLP tokenizerPipeline = getTokenizerPipeline();
for (SceneGraphImageRegion region : img.regions) {
Annotation doc = new Annotation(region.phrase.toLowerCase());
tokenizerPipeline.annotate(doc);
CoreMap sentenceAnn = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
region.tokens = sentenceAnn.get(CoreAnnotations.TokensAnnotation.class);
}
}
Aggregations