Search in sources :

Example 66 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class OpenIEServlet method doGet.

/**
   * Actually perform the GET request, given all the relevant information (already sanity checked).
   * This is the meat of the servlet code.
   * @param out The writer to write the output to.
   * @param q The query string.
   */
private void doGet(PrintWriter out, String q) {
    // Clean the string a bit
    q = q.trim();
    if (q.length() == 0) {
        return;
    }
    char lastChar = q.charAt(q.length() - 1);
    if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
        q = q + ".";
    }
    // Annotate
    Annotation ann = new Annotation(q);
    try {
        // Collect results
        Set<String> entailments = new HashSet<>();
        Set<String> triples = new LinkedHashSet<>();
        // pipeline must come before backoff
        runWithPipeline(pipeline, ann, triples, entailments);
        if (triples.size() == 0) {
            // backoff must come after pipeline
            runWithPipeline(backoff, ann, triples, entailments);
        }
        // Write results
        out.println("{ " + "\"ok\":true, " + "\"entailments\": [" + StringUtils.join(entailments, ",") + "], " + "\"triples\": [" + StringUtils.join(triples, ",") + "], " + "\"msg\": \"\"" + " }");
    } catch (Throwable t) {
        out.println("{ok:false, entailments:[], triples:[], msg:" + quote(t.getMessage()) + "}");
    }
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 67 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class TokensRegexDemo method main.

public static void main(String[] args) throws IOException {
    String rules;
    if (args.length > 0) {
        rules = args[0];
    } else {
        rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
    }
    PrintWriter out;
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    }
    CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
    Annotation annotation;
    if (args.length > 1) {
        annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    } else {
        annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
    }
    pipeline.annotate(annotation);
    // An Annotation is a Map and you can get and use the various analyses individually.
    out.println();
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println("The top level annotation");
    out.println(annotation.toShorterString());
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
        for (MatchedExpression matched : matchedExpressions) {
            // Print out matched text and value
            out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
            // Print out token information
            CoreMap cm = matched.getAnnotation();
            for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                out.println("  Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
            }
        }
    }
    out.flush();
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) PrintWriter(java.io.PrintWriter)

Example 68 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class TokensRegexRetokenizeDemo method runPipeline.

public static void runPipeline(StanfordCoreNLP pipeline, String text, PrintWriter out) {
    Annotation annotation = new Annotation(text);
    pipeline.annotate(annotation);
    // An Annotation is a Map and you can get and use the various analyses individually.
    out.println();
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println("The top level annotation");
    out.println(annotation.toShorterString());
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // Print out token annotations
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // Print out words, lemma, ne, and normalized ne
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            String normalized = token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
            out.println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
        }
    }
    out.flush();
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 69 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class SentimentPipeline method main.

/** Runs the tree-based sentiment model on some text. */
public static void main(String[] args) throws IOException {
    String parserModel = null;
    String sentimentModel = null;
    String filename = null;
    String fileList = null;
    boolean stdin = false;
    boolean filterUnknown = false;
    List<Output> outputFormats = Collections.singletonList(Output.ROOT);
    Input inputFormat = Input.TEXT;
    String tlppClass = DEFAULT_TLPP_CLASS;
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
            sentimentModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
            parserModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-file")) {
            filename = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-fileList")) {
            fileList = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-stdin")) {
            stdin = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-input")) {
            inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase());
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-output")) {
            String[] formats = args[argIndex + 1].split(",");
            outputFormats = new ArrayList<>();
            for (String format : formats) {
                outputFormats.add(Output.valueOf(format.toUpperCase()));
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
            filterUnknown = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
            tlppClass = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-help")) {
            help();
            System.exit(0);
        } else {
            log.info("Unknown argument " + args[argIndex + 1]);
            help();
            throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
        }
    }
    // We construct two pipelines.  One handles tokenization, if
    // necessary.  The other takes tokenized sentences and converts
    // them to sentiment trees.
    Properties pipelineProps = new Properties();
    Properties tokenizerProps = null;
    if (sentimentModel != null) {
        pipelineProps.setProperty("sentiment.model", sentimentModel);
    }
    if (parserModel != null) {
        pipelineProps.setProperty("parse.model", parserModel);
    }
    if (inputFormat == Input.TREES) {
        pipelineProps.setProperty("annotators", "binarizer, sentiment");
        pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
        pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
        pipelineProps.setProperty("enforceRequirements", "false");
    } else {
        pipelineProps.setProperty("annotators", "parse, sentiment");
        pipelineProps.setProperty("enforceRequirements", "false");
        tokenizerProps = new Properties();
        tokenizerProps.setProperty("annotators", "tokenize, ssplit");
    }
    if (stdin && tokenizerProps != null) {
        tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
    }
    int count = 0;
    if (filename != null)
        count++;
    if (fileList != null)
        count++;
    if (stdin)
        count++;
    if (count > 1) {
        throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
    }
    if (count == 0) {
        throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
    }
    StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
    if (filename != null) {
        // Process a file.  The pipeline will do tokenization, which
        // means it will split it into sentences as best as possible
        // with the tokenizer.
        List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
        for (Annotation annotation : annotations) {
            pipeline.annotate(annotation);
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                System.out.println(sentence);
                outputTree(System.out, sentence, outputFormats);
            }
        }
    } else if (fileList != null) {
        // for each file.
        for (String file : fileList.split(",")) {
            List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
            FileOutputStream fout = new FileOutputStream(file + ".out");
            PrintStream pout = new PrintStream(fout);
            for (Annotation annotation : annotations) {
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    pout.println(sentence);
                    outputTree(pout, sentence, outputFormats);
                }
            }
            pout.flush();
            fout.close();
        }
    } else {
        // Process stdin.  Each line will be treated as a single sentence.
        log.info("Reading in text from stdin.");
        log.info("Please enter one sentence per line.");
        log.info("Processing will end when EOF is reached.");
        BufferedReader reader = IOUtils.readerFromStdin("utf-8");
        for (String line; (line = reader.readLine()) != null; ) {
            line = line.trim();
            if (!line.isEmpty()) {
                Annotation annotation = tokenizer.process(line);
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    outputTree(System.out, sentence, outputFormats);
                }
            } else {
                // Output blank lines for blank lines so the tool can be
                // used for line-by-line text processing
                System.out.println();
            }
        }
    }
}
Also used : PrintStream(java.io.PrintStream) Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) FileOutputStream(java.io.FileOutputStream) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 70 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class Sentence method openieTriples.

/**
   * Get the OpenIE triples associated with this sentence.
   * Note that this function may be slower than you would expect, as it has to
   * convert the underlying Protobuf representation back into {@link CoreLabel}s.
   *
   * @param props The properties to use for the OpenIE annotator.
   * @return A collection of {@link RelationTriple} objects representing the OpenIE triples in the sentence.
   */
public Collection<RelationTriple> openieTriples(Properties props) {
    document.runOpenie(props);
    synchronized (impl) {
        List<CoreLabel> tokens = asCoreLabels();
        Annotation doc = document.asAnnotation();
        return impl.getOpenieTripleList().stream().map(x -> ProtobufAnnotationSerializer.fromProto(x, doc, this.sentenceIndex())).collect(Collectors.toList());
    }
}
Also used : java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) ProtobufAnnotationSerializer(edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer) edu.stanford.nlp.util(edu.stanford.nlp.util) BiFunction(java.util.function.BiFunction) CoreNLPProtos(edu.stanford.nlp.pipeline.CoreNLPProtos) Tree(edu.stanford.nlp.trees.Tree) SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) Function(java.util.function.Function) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) OutputStream(java.io.OutputStream) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Polarity(edu.stanford.nlp.naturalli.Polarity) OperatorSpec(edu.stanford.nlp.naturalli.OperatorSpec) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Consumer(java.util.function.Consumer) Annotator(edu.stanford.nlp.pipeline.Annotator) SemanticGraphFactory(edu.stanford.nlp.semgraph.SemanticGraphFactory) Stream(java.util.stream.Stream) Annotation(edu.stanford.nlp.pipeline.Annotation) TokenSequenceMatcher(edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher) SentenceFragment(edu.stanford.nlp.naturalli.SentenceFragment) InputStream(java.io.InputStream) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Annotation(edu.stanford.nlp.pipeline.Annotation)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)91 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)58 CoreMap (edu.stanford.nlp.util.CoreMap)50 CoreLabel (edu.stanford.nlp.ling.CoreLabel)30 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)27 ArrayList (java.util.ArrayList)25 Properties (java.util.Properties)25 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)19 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)14 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)13 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)11 Tree (edu.stanford.nlp.trees.Tree)10 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)8 IOException (java.io.IOException)8 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)7 CorefChain (edu.stanford.nlp.coref.data.CorefChain)6 EntityMentionsAnnotation (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation)6 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)6