use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class MachineReading method makeMachineReading.
public static MachineReading makeMachineReading(String[] args) throws IOException {
// install global parameters
MachineReading mr = new MachineReading(args);
//TODO:
ArgumentParser.fillOptions(MachineReadingProperties.class, args);
//Arguments.parse(args, mr);
log.info("PERCENTAGE OF TRAIN: " + MachineReadingProperties.percentageOfTrain);
// convert args to properties
Properties props = StringUtils.argsToProperties(args);
if (props == null) {
throw new RuntimeException("ERROR: failed to find Properties in the given arguments!");
}
String logLevel = props.getProperty("logLevel", "INFO");
setLoggerLevel(Level.parse(logLevel.toUpperCase()));
// install reader specific parameters
GenericDataSetReader reader = mr.makeReader(props);
GenericDataSetReader auxReader = mr.makeAuxReader();
Level readerLogLevel = Level.parse(MachineReadingProperties.readerLogLevel.toUpperCase());
reader.setLoggerLevel(readerLogLevel);
if (auxReader != null) {
auxReader.setLoggerLevel(readerLogLevel);
}
log.info("The reader log level is set to " + readerLogLevel);
//Execution.fillOptions(GenericDataSetReaderProps.class, args);
//Arguments.parse(args, reader);
// create the pre-processing pipeline
StanfordCoreNLP pipe = new StanfordCoreNLP(props, false);
reader.setProcessor(pipe);
if (auxReader != null) {
auxReader.setProcessor(pipe);
}
// create the results printers
mr.makeResultsPrinters(args);
return mr;
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class AceReader method main.
// simple testing code
public static void main(String[] args) throws IOException {
Properties props = StringUtils.argsToProperties(args);
AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
r.setLoggerLevel(Level.INFO);
r.parse("/scr/nlp/data/ACE2005/");
// Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
// BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
log.info("done");
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class OpenIEDemo method main.
public static void main(String[] args) throws Exception {
// Create the Stanford CoreNLP pipeline
Properties props = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Annotate an example document.
String text;
if (args.length > 0) {
text = IOUtils.slurpFile(args[0]);
} else {
text = "Obama was born in Hawaii. He is our president.";
}
Annotation doc = new Annotation(text);
pipeline.annotate(doc);
// Loop over sentences in the document
int sentNo = 0;
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println("Sentence #" + ++sentNo + ": " + sentence.get(CoreAnnotations.TextAnnotation.class));
// Print SemanticGraph
System.out.println(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
// Get the OpenIE triples for the sentence
Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
// Print the triples
for (RelationTriple triple : triples) {
System.out.println(triple.confidence + "\t" + triple.subjectLemmaGloss() + "\t" + triple.relationLemmaGloss() + "\t" + triple.objectLemmaGloss());
}
// Alternately, to only run e.g., the clause splitter:
List<SentenceFragment> clauses = new OpenIE(props).clausesInSentence(sentence);
for (SentenceFragment clause : clauses) {
System.out.println(clause.parseTree.toString(SemanticGraph.OutputFormat.LIST));
}
System.out.println();
}
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexDemo method main.
public static void main(String[] args) throws IOException {
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
}
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
for (MatchedExpression matched : matchedExpressions) {
// Print out matched text and value
out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
// Print out token information
CoreMap cm = matched.getAnnotation();
for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
out.println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
}
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexRetokenizeDemo method main.
public static void main(String[] args) throws IOException {
PrintWriter out;
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/retokenize.rules.txt";
}
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
String text;
if (args.length > 1) {
text = IOUtils.slurpFileNoExceptions(args[1]);
} else {
text = "Do we tokenize on hyphens? one-two-three-four. How about dates? 03-16-2015.";
}
Properties propertiesDefaultTokenize = new Properties();
propertiesDefaultTokenize.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
StanfordCoreNLP pipelineDefaultRetokenize = new StanfordCoreNLP();
out.println("Default tokenization: ");
runPipeline(pipelineDefaultRetokenize, text, out);
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize,retokenize,ssplit,pos,lemma,ner");
properties.setProperty("customAnnotatorClass.retokenize", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
properties.setProperty("retokenize.rules", rules);
StanfordCoreNLP pipelineWithRetokenize = new StanfordCoreNLP(properties);
out.println();
out.println("Always tokenize hyphens: ");
runPipeline(pipelineWithRetokenize, text, out);
}
Aggregations