Search in sources :

Example 6 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project neo4j-nlp-stanfordnlp by graphaware.

the class StanfordTextProcessor method annotateTags.

@Override
public List<Tag> annotateTags(String text, String lang) {
    List<Tag> result = new ArrayList<>();
    Annotation document = new Annotation(text);
    pipelines.get(TOKENIZER).annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Optional<CoreMap> sentence = sentences.stream().findFirst();
    if (sentence.isPresent()) {
        Stream<Tag> oTags = sentence.get().get(CoreAnnotations.TokensAnnotation.class).stream().map((token) -> getTag(lang, token)).filter((tag) -> (tag != null) && checkLemmaIsValid(tag.getLemma()));
        oTags.forEach((tag) -> result.add(tag));
    }
    return result;
}
Also used : java.util(java.util) CorefChain(edu.stanford.nlp.coref.data.CorefChain) Log(org.neo4j.logging.Log) Tree(edu.stanford.nlp.trees.Tree) Matcher(java.util.regex.Matcher) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Pair(edu.stanford.nlp.util.Pair) CoreMap(edu.stanford.nlp.util.CoreMap) FileUtils(com.graphaware.nlp.util.FileUtils) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) com.graphaware.nlp.domain(com.graphaware.nlp.domain) Stream(java.util.stream.Stream) NERModelTool(com.graphaware.nlp.processor.stanford.model.NERModelTool) Word(edu.stanford.nlp.ling.Word) AbstractTextProcessor(com.graphaware.nlp.processor.AbstractTextProcessor) Annotation(edu.stanford.nlp.pipeline.Annotation) LoggerFactory(com.graphaware.common.log.LoggerFactory) StringUtils(edu.stanford.nlp.util.StringUtils) NLPTextProcessor(com.graphaware.nlp.annotation.NLPTextProcessor) PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) DEFAULT_BACKGROUND_SYMBOL(edu.stanford.nlp.sequences.SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 7 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method main.

/**
 * Reads and dumps output, mainly for debugging.
 */
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    boolean debug = false;
    String filepath = props.getProperty("i");
    String outfile = props.getProperty("o");
    if (filepath == null || outfile == null) {
        usage();
        System.exit(-1);
    }
    PrintWriter fout = new PrintWriter(outfile);
    logger.info("Writing to " + outfile);
    String ext = props.getProperty("ext");
    Options options;
    if (ext != null) {
        options = new Options(".*" + ext + "$");
    } else {
        options = new Options();
    }
    options.annotateTreeCoref = true;
    options.annotateTreeNer = true;
    CorpusStats corpusStats = new CorpusStats();
    CoNLLDocumentReader reader = new CoNLLDocumentReader(filepath, options);
    int docCnt = 0;
    int sentCnt = 0;
    int tokenCnt = 0;
    for (CoNLLDocument doc; (doc = reader.getNextDocument()) != null; ) {
        corpusStats.process(doc);
        docCnt++;
        Annotation anno = doc.getAnnotation();
        if (debug)
            logger.info("Document " + docCnt + ": " + anno.get(CoreAnnotations.DocIDAnnotation.class));
        for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
            if (debug)
                logger.info("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
            if (debug)
                logger.info("Sentence Tokens: " + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
            writeTabSep(fout, sentence, doc.corefChainMap);
            sentCnt++;
            tokenCnt += sentence.get(CoreAnnotations.TokensAnnotation.class).size();
        }
        if (debug) {
            for (CoreMap ner : doc.nerChunks) {
                logger.info("NER Chunk: " + ner);
            }
            for (String id : doc.corefChainMap.keySet()) {
                logger.info("Coref: " + id + " = " + StringUtils.join(doc.corefChainMap.get(id), ";"));
            }
        }
    }
    fout.close();
    logger.info("Total document count: " + docCnt);
    logger.info("Total sentence count: " + sentCnt);
    logger.info("Total token count: " + tokenCnt);
    logger.info(corpusStats);
}
Also used : Properties(java.util.Properties) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap) PrintWriter(java.io.PrintWriter)

Example 8 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class RothCONLL04Reader method main.

public static void main(String[] args) throws Exception {
    // just a simple test, to make sure stuff works
    Properties props = StringUtils.argsToProperties(args);
    RothCONLL04Reader reader = new RothCONLL04Reader();
    reader.setLoggerLevel(Level.INFO);
    reader.setProcessor(new StanfordCoreNLP(props));
    Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
    System.out.println(AnnotationUtils.datasetToString(doc));
}
Also used : Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 9 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class AnnotationUtils method createDataset.

/**
 * Given a list of sentences (as CoreMaps), wrap it in a new Annotation.
 */
public static Annotation createDataset(List<CoreMap> sentences) {
    Annotation dataset = new Annotation("");
    addSentences(dataset, sentences);
    return dataset;
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 10 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class CoreNLPServlet method addResults.

public void addResults(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    String input = request.getParameter("input");
    if (input == null) {
        return;
    }
    input = input.trim();
    if (input.isEmpty()) {
        return;
    }
    PrintWriter out = response.getWriter();
    if (input.length() > MAXIMUM_QUERY_LENGTH) {
        out.print("<div>This query is too long.  If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/corenlp.html\">publicly released distribution</a>.</div>");
        return;
    }
    Annotation annotation = new Annotation(input);
    pipeline.annotate(annotation);
    String outputFormat = request.getParameter("outputFormat");
    if (outputFormat == null || outputFormat.trim().isEmpty()) {
        outputFormat = this.defaultFormat;
    }
    switch(outputFormat) {
        case "xml":
            outputXml(out, annotation);
            break;
        case "json":
            outputJson(out, annotation);
            break;
        case "conll":
            outputCoNLL(out, annotation);
            break;
        case "pretty":
            outputPretty(out, annotation);
            break;
        default:
            outputVisualise(out, annotation);
            break;
    }
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation) PrintWriter(java.io.PrintWriter)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10