use of edu.stanford.nlp.coref.data.DocumentMaker in project CoreNLP by stanfordnlp.
the class CorefDocumentProcessor method runFromScratch.
public default void runFromScratch(Properties props, Dictionaries dictionaries) throws Exception {
// Some annotators produce slightly different outputs when running over the same input data
// twice. Here we first clear annotator pool to avoid this.
StanfordCoreNLP.clearAnnotatorPool();
run(new DocumentMaker(props, dictionaries));
}
use of edu.stanford.nlp.coref.data.DocumentMaker in project CoreNLP by stanfordnlp.
the class SingletonPredictor method generateFeatureVectors.
/**
* Generate the training features from the CoNLL input file.
* @return Dataset of feature vectors
* @throws Exception
*/
private static GeneralDataset<String, String> generateFeatureVectors(Properties props) throws Exception {
GeneralDataset<String, String> dataset = new Dataset<>();
Dictionaries dict = new Dictionaries(props);
DocumentMaker docMaker = new DocumentMaker(props, dict);
Document document;
while ((document = docMaker.nextDoc()) != null) {
setTokenIndices(document);
Map<Integer, CorefCluster> entities = document.goldCorefClusters;
// Generate features for coreferent mentions with class label 1
for (CorefCluster entity : entities.values()) {
for (Mention mention : entity.getCorefMentions()) {
// Ignore verbal mentions
if (mention.headWord.tag().startsWith("V"))
continue;
IndexedWord head = mention.enhancedDependency.getNodeByIndexSafe(mention.headWord.index());
if (head == null)
continue;
ArrayList<String> feats = mention.getSingletonFeatures(dict);
dataset.add(new BasicDatum<>(feats, "1"));
}
}
// Generate features for singletons with class label 0
ArrayList<CoreLabel> gold_heads = new ArrayList<>();
for (Mention gold_men : document.goldMentionsByID.values()) {
gold_heads.add(gold_men.headWord);
}
for (Mention predicted_men : document.predictedMentionsByID.values()) {
SemanticGraph dep = predicted_men.enhancedDependency;
IndexedWord head = dep.getNodeByIndexSafe(predicted_men.headWord.index());
if (head == null || !dep.vertexSet().contains(head))
continue;
// Ignore verbal mentions
if (predicted_men.headWord.tag().startsWith("V"))
continue;
// If the mention is in the gold set, it is not a singleton and thus ignore
if (gold_heads.contains(predicted_men.headWord))
continue;
dataset.add(new BasicDatum<>(predicted_men.getSingletonFeatures(dict), "0"));
}
}
dataset.summaryStatistics();
return dataset;
}
use of edu.stanford.nlp.coref.data.DocumentMaker in project CoreNLP by stanfordnlp.
the class FromFileCorefAlgorithm method main.
public static void main(String[] args) throws Exception {
Properties props = StringUtils.argsToProperties(new String[] { "-props", args[0] });
new CorefSystem(new DocumentMaker(props, new Dictionaries(props)), new FromFileCorefAlgorithm(args[1]), true, false).runOnConll(props);
}
Aggregations