Search in sources :

Example 61 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class TokensRegexAnnotatorDemo method main.

public static void main(String[] args) throws IOException {
    PrintWriter out;
    String rules;
    if (args.length > 0) {
        rules = args[0];
    } else {
        rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/colors.rules.txt";
    }
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    }
    Properties properties = new Properties();
    properties.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,tokensregexdemo");
    properties.setProperty("customAnnotatorClass.tokensregexdemo", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
    properties.setProperty("tokensregexdemo.rules", rules);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
    Annotation annotation;
    if (args.length > 1) {
        annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    } else {
        annotation = new Annotation("Both blue and light blue are nice colors.");
    }
    pipeline.annotate(annotation);
    // An Annotation is a Map and you can get and use the various analyses individually.
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println();
    out.println("The top level annotation");
    out.println(annotation.toShorterString());
    out.println();
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // that are of interest other than just the tokens and what we print out here
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // Print out words, lemma, ne, and normalized ne
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
            out.println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
        }
    }
    out.flush();
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) PrintWriter(java.io.PrintWriter)

Example 62 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class TokensRegexDemo method main.

public static void main(String[] args) throws IOException {
    String rules;
    if (args.length > 0) {
        rules = args[0];
    } else {
        rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
    }
    PrintWriter out;
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    }
    CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
    Annotation annotation;
    if (args.length > 1) {
        annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    } else {
        annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
    }
    pipeline.annotate(annotation);
    // An Annotation is a Map and you can get and use the various analyses individually.
    out.println();
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println("The top level annotation");
    out.println(annotation.toShorterString());
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    int i = 0;
    for (CoreMap sentence : sentences) {
        out.println("Sentence #" + ++i);
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            out.println("  Token: " + "word=" + token.get(CoreAnnotations.TextAnnotation.class) + ",  pos=" + token.get(CoreAnnotations.PartOfSpeechAnnotation.class) + ", ne=" + token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
        }
        List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
        for (MatchedExpression matched : matchedExpressions) {
            // Print out matched text and value
            out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
            // Print out token information
            CoreMap cm = matched.getAnnotation();
            for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                out.println("  Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
            }
        }
    }
    out.flush();
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) PrintWriter(java.io.PrintWriter)

Example 63 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class TokensRegexMatcher method main.

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.err.println("TokensRegexMatcher rules file [outFile]");
        return;
    }
    String rules = args[0];
    PrintWriter out;
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    }
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
    Annotation annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    pipeline.annotate(annotation);
    // Load lines of file as TokenSequencePatterns
    List<TokenSequencePattern> tokenSequencePatterns = new ArrayList<TokenSequencePattern>();
    for (String line : ObjectBank.getLineIterator(rules)) {
        TokenSequencePattern pattern = TokenSequencePattern.compile(line);
        tokenSequencePatterns.add(pattern);
    }
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    int i = 0;
    for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        out.println("Sentence #" + ++i);
        out.print("  Tokens:");
        for (CoreLabel token : tokens) {
            out.print(' ');
            out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
        }
        out.println();
        MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns);
        List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens);
        int j = 0;
        for (SequenceMatchResult<CoreMap> matched : answers) {
            out.println("  Match #" + ++j);
            for (int k = 0; k <= matched.groupCount(); k++) {
                out.println("    group " + k + " = " + matched.group(k));
            }
        }
    }
    out.flush();
}
Also used : ArrayList(java.util.ArrayList) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) SequenceMatchResult(edu.stanford.nlp.ling.tokensregex.SequenceMatchResult) PrintWriter(java.io.PrintWriter)

Example 64 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class TokensRegexMatcherDemo method main.

public static void main(String[] args) {
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
    Annotation annotation = new Annotation("Casey is 21. Sally Atkinson's age is 30.");
    pipeline.annotate(annotation);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<TokenSequencePattern> tokenSequencePatterns = new ArrayList<>();
    String[] patterns = { "(?$who [ ner: PERSON]+ ) /is/ (?$age [ pos: CD ] )", "(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" };
    for (String line : patterns) {
        TokenSequencePattern pattern = TokenSequencePattern.compile(line);
        tokenSequencePatterns.add(pattern);
    }
    MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns);
    int i = 0;
    for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        System.out.println("Sentence #" + ++i);
        System.out.print("  Tokens:");
        for (CoreLabel token : tokens) {
            System.out.print(' ');
            System.out.print(token.toShortString("Text", "PartOfSpeech", "NamedEntityTag"));
        }
        System.out.println();
        List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens);
        int j = 0;
        for (SequenceMatchResult<CoreMap> matched : answers) {
            System.out.println("  Match #" + ++j);
            System.out.println("    match: " + matched.group(0));
            System.out.println("      who: " + matched.group("$who"));
            System.out.println("      age: " + matched.group("$age"));
        }
    }
}
Also used : ArrayList(java.util.ArrayList) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) SequenceMatchResult(edu.stanford.nlp.ling.tokensregex.SequenceMatchResult)

Example 65 with StanfordCoreNLP

use of edu.stanford.nlp.pipeline.StanfordCoreNLP in project CoreNLP by stanfordnlp.

the class TokensRegexRetokenizeDemo method main.

public static void main(String[] args) throws IOException {
    PrintWriter out;
    String rules;
    if (args.length > 0) {
        rules = args[0];
    } else {
        rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/retokenize.rules.txt";
    }
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    }
    String text;
    if (args.length > 1) {
        text = IOUtils.slurpFileNoExceptions(args[1]);
    } else {
        text = "Do we tokenize on hyphens? one-two-three-four.  How about dates? 03-16-2015.";
    }
    Properties propertiesDefaultTokenize = new Properties();
    propertiesDefaultTokenize.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
    StanfordCoreNLP pipelineDefaultRetokenize = new StanfordCoreNLP();
    out.println("Default tokenization: ");
    runPipeline(pipelineDefaultRetokenize, text, out);
    Properties properties = new Properties();
    properties.setProperty("annotators", "tokenize,retokenize,ssplit,pos,lemma,ner");
    properties.setProperty("customAnnotatorClass.retokenize", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
    properties.setProperty("retokenize.rules", rules);
    StanfordCoreNLP pipelineWithRetokenize = new StanfordCoreNLP(properties);
    out.println();
    out.println("Always tokenize hyphens: ");
    runPipeline(pipelineWithRetokenize, text, out);
}
Also used : Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) PrintWriter(java.io.PrintWriter)

Aggregations

StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)71 Properties (java.util.Properties)44 Annotation (edu.stanford.nlp.pipeline.Annotation)40 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)33 CoreMap (edu.stanford.nlp.util.CoreMap)33 Test (org.junit.Test)15 CoreLabel (edu.stanford.nlp.ling.CoreLabel)12 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)6 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 StanfordTextProcessor (com.graphaware.nlp.processor.stanford.StanfordTextProcessor)5 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)5 PrintWriter (java.io.PrintWriter)5 ArrayList (java.util.ArrayList)5 AnnotatedText (com.graphaware.nlp.domain.AnnotatedText)3 CorefChain (edu.stanford.nlp.coref.data.CorefChain)3 GoldAnswerAnnotation (edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation)3 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)3 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)3