use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class DcorefExactOutputITest method main.
/**
* If run as a program, writes the expected output of args[0] to args[1]
*/
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Expected args <input> <output>");
throw new IllegalArgumentException();
}
String input = args[0];
String output = args[1];
Properties props = new Properties();
props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// for example
// "edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm"
String doc = IOUtils.slurpFile(input);
Annotation annotation = pipeline.process(doc);
Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveResults(output, chains);
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexAnnotatorDemo method main.
public static void main(String[] args) throws IOException {
PrintWriter out;
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/colors.rules.txt";
}
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,tokensregexdemo");
properties.setProperty("customAnnotatorClass.tokensregexdemo", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
properties.setProperty("tokensregexdemo.rules", rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("Both blue and light blue are nice colors.");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// that are of interest other than just the tokens and what we print out here
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// Print out words, lemma, ne, and normalized ne
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
out.println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class GetPatternsFromDataMultiClass method runPOSNEROnTokens.
public static Map<String, DataInstance> runPOSNEROnTokens(List<CoreMap> sentsCM, String posModelPath, boolean useTargetNERRestriction, String prefix, boolean useTargetParserParentRestriction, String numThreads, PatternFactory.PatternType type) {
Annotation doc = new Annotation(sentsCM);
Properties props = new Properties();
List<String> anns = new ArrayList<>();
anns.add("pos");
anns.add("lemma");
if (useTargetParserParentRestriction) {
anns.add("parse");
} else if (type.equals(PatternFactory.PatternType.DEP))
anns.add("depparse");
if (useTargetNERRestriction) {
anns.add("ner");
}
props.setProperty("annotators", StringUtils.join(anns, ","));
props.setProperty("parse.maxlen", "80");
props.setProperty("nthreads", numThreads);
props.setProperty("threads", numThreads);
if (posModelPath != null) {
props.setProperty("pos.model", posModelPath);
}
StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
Redwood.log(Redwood.DBG, "Annotating text");
pipeline.annotate(doc);
Redwood.log(Redwood.DBG, "Done annotating text");
Map<String, DataInstance> sents = new HashMap<>();
for (CoreMap s : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
if (useTargetParserParentRestriction)
inferParentParseTag(s.get(TreeAnnotation.class));
DataInstance d = DataInstance.getNewInstance(type, s);
sents.put(prefix + s.get(CoreAnnotations.DocIDAnnotation.class), d);
}
return sents;
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project cogcomp-nlp by CogComp.
the class StanfordCorefHandler method initialize.
@Override
public void initialize(ResourceManager rm) {
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
this.pipeline = new StanfordCoreNLP(props);
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project cogcomp-nlp by CogComp.
the class StanfordRelationsHandler method initialize.
@Override
public void initialize(ResourceManager rm) {
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,depparse,relation");
this.pipeline = new StanfordCoreNLP(props);
}
Aggregations