Search in sources :

Example 96 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class SentimentPipeline method main.

/**
 * Runs the tree-based sentiment model on some text.
 */
public static void main(String[] args) throws IOException {
    String parserModel = null;
    String sentimentModel = null;
    String filename = null;
    String fileList = null;
    boolean stdin = false;
    boolean filterUnknown = false;
    List<Output> outputFormats = Collections.singletonList(Output.ROOT);
    Input inputFormat = Input.TEXT;
    String tlppClass = DEFAULT_TLPP_CLASS;
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
            sentimentModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
            parserModel = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-file")) {
            filename = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-fileList")) {
            fileList = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-stdin")) {
            stdin = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-input")) {
            inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase(Locale.ROOT));
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-output")) {
            String[] formats = args[argIndex + 1].split(",");
            outputFormats = new ArrayList<>();
            for (String format : formats) {
                outputFormats.add(Output.valueOf(format.toUpperCase(Locale.ROOT)));
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
            filterUnknown = true;
            argIndex++;
        } else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
            tlppClass = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-help")) {
            help();
            System.exit(0);
        } else {
            log.info("Unknown argument " + args[argIndex + 1]);
            help();
            throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
        }
    }
    // We construct two pipelines.  One handles tokenization, if
    // necessary.  The other takes tokenized sentences and converts
    // them to sentiment trees.
    Properties pipelineProps = new Properties();
    Properties tokenizerProps = null;
    if (sentimentModel != null) {
        pipelineProps.setProperty("sentiment.model", sentimentModel);
    }
    if (parserModel != null) {
        pipelineProps.setProperty("parse.model", parserModel);
    }
    if (inputFormat == Input.TREES) {
        pipelineProps.setProperty("annotators", "binarizer, sentiment");
        pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
        pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
        pipelineProps.setProperty("enforceRequirements", "false");
    } else {
        pipelineProps.setProperty("annotators", "parse, sentiment");
        pipelineProps.setProperty("parse.binaryTrees", "true");
        pipelineProps.setProperty("parse.buildgraphs", "false");
        pipelineProps.setProperty("enforceRequirements", "false");
        tokenizerProps = new Properties();
        tokenizerProps.setProperty("annotators", "tokenize, ssplit");
    }
    if (stdin && tokenizerProps != null) {
        tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
    }
    int count = 0;
    if (filename != null)
        count++;
    if (fileList != null)
        count++;
    if (stdin)
        count++;
    if (count > 1) {
        throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
    }
    if (count == 0) {
        throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
    }
    StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
    if (filename != null) {
        // Process a file.  The pipeline will do tokenization, which
        // means it will split it into sentences as best as possible
        // with the tokenizer.
        List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
        for (Annotation annotation : annotations) {
            pipeline.annotate(annotation);
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                System.out.println(sentence);
                outputTree(System.out, sentence, outputFormats);
            }
        }
    } else if (fileList != null) {
        // for each file.
        for (String file : fileList.split(",")) {
            List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
            FileOutputStream fout = new FileOutputStream(file + ".out");
            PrintStream pout = new PrintStream(fout);
            for (Annotation annotation : annotations) {
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    pout.println(sentence);
                    outputTree(pout, sentence, outputFormats);
                }
            }
            pout.flush();
            fout.close();
        }
    } else {
        // Process stdin.  Each line will be treated as a single sentence.
        log.info("Reading in text from stdin.");
        log.info("Please enter one sentence per line.");
        log.info("Processing will end when EOF is reached.");
        BufferedReader reader = IOUtils.readerFromStdin("utf-8");
        for (String line; (line = reader.readLine()) != null; ) {
            line = line.trim();
            if (!line.isEmpty()) {
                Annotation annotation = tokenizer.process(line);
                pipeline.annotate(annotation);
                for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    outputTree(System.out, sentence, outputFormats);
                }
            } else {
                // Output blank lines for blank lines so the tool can be
                // used for line-by-line text processing
                System.out.println();
            }
        }
    }
}
Also used : PrintStream(java.io.PrintStream) Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) FileOutputStream(java.io.FileOutputStream) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 97 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class ProcessTokensRegexRequestTest method testTwoRequests.

/**
 * Test two patterns that get one result each
 */
@Test
public void testTwoRequests() {
    Annotation ann = pipeline.process("This is a small test");
    CoreNLPProtos.TokensRegexRequest request = buildRequest(ann, "/small/", "/test/");
    CoreNLPProtos.TokensRegexResponse response = ProcessTokensRegexRequest.processRequest(request);
    Assert.assertEquals(response.getMatchList().size(), 2);
    CoreNLPProtos.TokensRegexResponse.PatternMatch patternMatch = response.getMatchList().get(0);
    Assert.assertEquals(patternMatch.getMatchList().size(), 1);
    CoreNLPProtos.TokensRegexResponse.Match match = patternMatch.getMatchList().get(0);
    Assert.assertEquals(match.getSentence(), 0);
    Assert.assertEquals(match.getMatch().getText(), "small");
    Assert.assertEquals(match.getMatch().getBegin(), 3);
    Assert.assertEquals(match.getMatch().getEnd(), 4);
    patternMatch = response.getMatchList().get(1);
    Assert.assertEquals(patternMatch.getMatchList().size(), 1);
    match = patternMatch.getMatchList().get(0);
    Assert.assertEquals(match.getSentence(), 0);
    Assert.assertEquals(match.getMatch().getText(), "test");
    Assert.assertEquals(match.getMatch().getBegin(), 4);
    Assert.assertEquals(match.getMatch().getEnd(), 5);
}
Also used : CoreNLPProtos(edu.stanford.nlp.pipeline.CoreNLPProtos) Annotation(edu.stanford.nlp.pipeline.Annotation) Test(org.junit.Test)

Example 98 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class OpenIEServlet method doGet.

/**
 * Actually perform the GET request, given all the relevant information (already sanity checked).
 * This is the meat of the servlet code.
 * @param out The writer to write the output to.
 * @param q The query string.
 */
private void doGet(PrintWriter out, String q) {
    // Clean the string a bit
    q = q.trim();
    if (q.length() == 0) {
        return;
    }
    char lastChar = q.charAt(q.length() - 1);
    if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
        q = q + ".";
    }
    // Annotate
    Annotation ann = new Annotation(q);
    try {
        // Collect results
        Set<String> entailments = new HashSet<>();
        Set<String> triples = new LinkedHashSet<>();
        // pipeline must come before backoff
        runWithPipeline(pipeline, ann, triples, entailments);
        if (triples.size() == 0) {
            // backoff must come after pipeline
            runWithPipeline(backoff, ann, triples, entailments);
        }
        // Write results
        out.println("{ " + "\"ok\":true, " + "\"entailments\": [" + StringUtils.join(entailments, ",") + "], " + "\"triples\": [" + StringUtils.join(triples, ",") + "], " + "\"msg\": \"\"" + " }");
    } catch (Throwable t) {
        out.println("{ok:false, entailments:[], triples:[], msg:" + quote(t.getMessage()) + "}");
    }
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 99 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class DcorefPronounResolver method resolvePronouns.

@Override
protected HashMap<Integer, Integer> resolvePronouns(List<CoreLabel> tokens) {
    HashMap<Integer, Integer> pronPairs = new HashMap<Integer, Integer>(1);
    CoreMap sentence = new CoreLabel();
    sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, 1);
    List<CoreMap> sentences = new ArrayList<CoreMap>(1);
    sentences.add(sentence);
    Annotation annotation = new Annotation(sentences);
    pipeline.annotate(annotation);
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        CoreLabel firstRef = null;
        for (CorefMention m : chain.getMentionsInTextualOrder()) {
            CoreLabel lbl = tokens.get(m.headIndex - 1);
            if (lbl.tag().startsWith("PRP") && firstRef != null) {
                pronPairs.put(lbl.index(), firstRef.index());
            } else if (!lbl.tag().startsWith("PRP") && firstRef == null) {
                firstRef = lbl;
            }
        }
    }
    return pronPairs;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) CorefChain(edu.stanford.nlp.dcoref.CorefChain) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.dcoref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 100 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method parse.

/**
 * Parses one file or directory with data from one domain
 * @param path
 * @throws IOException
 */
public final Annotation parse(String path) throws IOException {
    // set below or exceptions
    Annotation retVal;
    try {
        // 
        // this must return a dataset Annotation. each sentence in this dataset must contain:
        // - TokensAnnotation
        // - EntityMentionAnnotation
        // - RelationMentionAnnotation
        // - EventMentionAnnotation
        // the other annotations (parse, NER) are generated in preProcessSentences
        // 
        retVal = this.read(path);
    } catch (Exception ex) {
        IOException iox = new IOException(ex);
        throw iox;
    }
    if (preProcessSentences) {
        preProcessSentences(retVal);
        if (MachineReadingProperties.trainUsePipelineNER) {
            logger.severe("Changing NER tags using the CoreNLP pipeline.");
            modifyUsingCoreNLPNER(retVal);
        }
    }
    return retVal;
}
Also used : IOException(java.io.IOException) Annotation(edu.stanford.nlp.pipeline.Annotation) IOException(java.io.IOException)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10