use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexAnnotatorDemo method main.
public static void main(String[] args) throws IOException {
PrintWriter out;
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/colors.rules.txt";
}
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,tokensregexdemo");
properties.setProperty("customAnnotatorClass.tokensregexdemo", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
properties.setProperty("tokensregexdemo.rules", rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("Both blue and light blue are nice colors.");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println();
out.println("The top level annotation");
out.println(annotation.toShorterString());
out.println();
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// that are of interest other than just the tokens and what we print out here
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// Print out words, lemma, ne, and normalized ne
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
out.println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexDemo method main.
public static void main(String[] args) throws IOException {
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
}
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
int i = 0;
for (CoreMap sentence : sentences) {
out.println("Sentence #" + ++i);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
out.println(" Token: " + "word=" + token.get(CoreAnnotations.TextAnnotation.class) + ", pos=" + token.get(CoreAnnotations.PartOfSpeechAnnotation.class) + ", ne=" + token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
}
List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
for (MatchedExpression matched : matchedExpressions) {
// Print out matched text and value
out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
// Print out token information
CoreMap cm = matched.getAnnotation();
for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
out.println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
}
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexMatcher method main.
public static void main(String[] args) throws IOException {
if (args.length < 2) {
System.err.println("TokensRegexMatcher rules file [outFile]");
return;
}
String rules = args[0];
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
pipeline.annotate(annotation);
// Load lines of file as TokenSequencePatterns
List<TokenSequencePattern> tokenSequencePatterns = new ArrayList<TokenSequencePattern>();
for (String line : ObjectBank.getLineIterator(rules)) {
TokenSequencePattern pattern = TokenSequencePattern.compile(line);
tokenSequencePatterns.add(pattern);
}
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
int i = 0;
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
out.println("Sentence #" + ++i);
out.print(" Tokens:");
for (CoreLabel token : tokens) {
out.print(' ');
out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
}
out.println();
MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns);
List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens);
int j = 0;
for (SequenceMatchResult<CoreMap> matched : answers) {
out.println(" Match #" + ++j);
for (int k = 0; k <= matched.groupCount(); k++) {
out.println(" group " + k + " = " + matched.group(k));
}
}
}
out.flush();
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexMatcherDemo method main.
public static void main(String[] args) {
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation = new Annotation("Casey is 21. Sally Atkinson's age is 30.");
pipeline.annotate(annotation);
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
List<TokenSequencePattern> tokenSequencePatterns = new ArrayList<>();
String[] patterns = { "(?$who [ ner: PERSON]+ ) /is/ (?$age [ pos: CD ] )", "(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" };
for (String line : patterns) {
TokenSequencePattern pattern = TokenSequencePattern.compile(line);
tokenSequencePatterns.add(pattern);
}
MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns);
int i = 0;
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
System.out.println("Sentence #" + ++i);
System.out.print(" Tokens:");
for (CoreLabel token : tokens) {
System.out.print(' ');
System.out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
}
System.out.println();
List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens);
int j = 0;
for (SequenceMatchResult<CoreMap> matched : answers) {
System.out.println(" Match #" + ++j);
System.out.println(" match: " + matched.group(0));
System.out.println(" who: " + matched.group("$who"));
System.out.println(" age: " + matched.group("$age"));
}
}
}
use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.
the class TokensRegexRetokenizeDemo method main.
public static void main(String[] args) throws IOException {
PrintWriter out;
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/retokenize.rules.txt";
}
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
String text;
if (args.length > 1) {
text = IOUtils.slurpFileNoExceptions(args[1]);
} else {
text = "Do we tokenize on hyphens? one-two-three-four. How about dates? 03-16-2015.";
}
Properties propertiesDefaultTokenize = new Properties();
propertiesDefaultTokenize.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
StanfordCoreNLP pipelineDefaultRetokenize = new StanfordCoreNLP();
out.println("Default tokenization: ");
runPipeline(pipelineDefaultRetokenize, text, out);
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize,retokenize,ssplit,pos,lemma,ner");
properties.setProperty("customAnnotatorClass.retokenize", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
properties.setProperty("retokenize.rules", rules);
StanfordCoreNLP pipelineWithRetokenize = new StanfordCoreNLP(properties);
out.println();
out.println("Always tokenize hyphens: ");
runPipeline(pipelineWithRetokenize, text, out);
}
Aggregations