Search in sources :

Example 16 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class QuantifiableEntityExtractorITest method createDocument.

protected static Annotation createDocument(String text) {
    Annotation annotation = new Annotation(text);
    pipeline.annotate(annotation);
    return annotation;
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 17 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class SpanishTokenizerAnnotatorITest method testSpanish.

@Test
public void testSpanish() {
    Annotation ann = new Annotation("Damelo");
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize");
    props.setProperty("tokenize.language", "es");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    Iterator<String> it = spanishTokens.iterator();
    for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
        assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
    }
    assertFalse("Too few tokens in new CoreLabel usage", it.hasNext());
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Properties(java.util.Properties) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Test(org.junit.Test)

Example 18 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class Util method annotate.

/**
 * TODO(gabor) JavaDoc
 *
 * @param sentence
 * @param pipeline
 */
public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
    Annotation ann = new Annotation(StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), " "));
    ann.set(CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class));
    ann.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
    pipeline.annotate(ann);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 19 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class GetPatternsFromDataMultiClass method runPOSNERParseOnTokens.

public static Map<String, DataInstance> runPOSNERParseOnTokens(Map<String, DataInstance> sents, Properties propsoriginal) {
    PatternFactory.PatternType type = PatternFactory.PatternType.valueOf(propsoriginal.getProperty(Flags.patternType));
    Properties props = new Properties();
    List<String> anns = new ArrayList<>();
    anns.add("pos");
    anns.add("lemma");
    boolean useTargetParserParentRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetParserParentRestriction));
    boolean useTargetNERRestriction = Boolean.parseBoolean(propsoriginal.getProperty(Flags.useTargetNERRestriction));
    String posModelPath = props.getProperty(Flags.posModelPath);
    String numThreads = propsoriginal.getProperty(Flags.numThreads);
    if (useTargetParserParentRestriction) {
        anns.add("parse");
    } else if (type.equals(PatternFactory.PatternType.DEP))
        anns.add("depparse");
    if (useTargetNERRestriction) {
        anns.add("ner");
    }
    props.setProperty("annotators", StringUtils.join(anns, ","));
    props.setProperty("parse.maxlen", "80");
    props.setProperty("nthreads", numThreads);
    props.setProperty("threads", numThreads);
    if (posModelPath != null) {
        props.setProperty("pos.model", posModelPath);
    }
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
    Redwood.log(Redwood.DBG, "Annotating text");
    for (Map.Entry<String, DataInstance> en : sents.entrySet()) {
        List<CoreMap> temp = new ArrayList<>();
        CoreMap s = new ArrayCoreMap();
        s.set(CoreAnnotations.TokensAnnotation.class, en.getValue().getTokens());
        temp.add(s);
        Annotation doc = new Annotation(temp);
        try {
            pipeline.annotate(doc);
            if (useTargetParserParentRestriction)
                inferParentParseTag(s.get(TreeAnnotation.class));
        } catch (Exception e) {
            log.warn("Ignoring error: for sentence  " + StringUtils.joinWords(en.getValue().getTokens(), " "));
            log.warn(e);
        }
    }
    Redwood.log(Redwood.DBG, "Done annotating text");
    return sents;
}
Also used : StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) GoldAnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation) SQLException(java.sql.SQLException) InvocationTargetException(java.lang.reflect.InvocationTargetException) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 20 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class GetPatternsFromDataMultiClass method tokenize.

public static int tokenize(Iterator<String> textReader, String posModelPath, boolean lowercase, boolean useTargetNERRestriction, String sentIDPrefix, boolean useTargetParserParentRestriction, String numThreads, boolean batchProcessSents, int numMaxSentencesPerBatchFile, File saveSentencesSerDirFile, Map<String, DataInstance> sents, int numFilesTillNow, PatternFactory.PatternType type) throws InterruptedException, ExecutionException, IOException {
    if (pipeline == null) {
        Properties props = new Properties();
        List<String> anns = new ArrayList<>();
        anns.add("tokenize");
        anns.add("ssplit");
        anns.add("pos");
        anns.add("lemma");
        if (useTargetParserParentRestriction) {
            anns.add("parse");
        }
        if (type.equals(PatternFactory.PatternType.DEP))
            anns.add("depparse");
        if (useTargetNERRestriction) {
            anns.add("ner");
        }
        props.setProperty("annotators", StringUtils.join(anns, ","));
        props.setProperty("parse.maxlen", "80");
        if (numThreads != null)
            props.setProperty("threads", numThreads);
        props.setProperty("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
        if (posModelPath != null) {
            props.setProperty("pos.model", posModelPath);
        }
        pipeline = new StanfordCoreNLP(props);
    }
    String text = "";
    int numLines = 0;
    while (textReader.hasNext()) {
        String line = textReader.next();
        numLines++;
        if (batchProcessSents && numLines > numMaxSentencesPerBatchFile) {
            break;
        }
        if (lowercase)
            line = line.toLowerCase();
        text += line + "\n";
    }
    Annotation doc = new Annotation(text);
    pipeline.annotate(doc);
    int i = -1;
    for (CoreMap s : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
        i++;
        if (useTargetParserParentRestriction)
            inferParentParseTag(s.get(TreeAnnotation.class));
        DataInstance d = DataInstance.getNewInstance(type, s);
        sents.put(sentIDPrefix + i, d);
    // if (batchProcessSents && sents.size() >= numMaxSentencesPerBatchFile) {
    // numFilesTillNow++;
    // File file = new File(saveSentencesSerDirFile + "/sents_" + numFilesTillNow);
    // IOUtils.writeObjectToFile(sents, file);
    // sents = new HashMap<String, DataInstance>();
    // Data.sentsFiles.add(file);
    // }
    }
    Redwood.log(Redwood.DBG, "Done annotating text with " + i + " sentences");
    if (sents.size() > 0 && batchProcessSents) {
        numFilesTillNow++;
        File file = new File(saveSentencesSerDirFile + "/sents_" + numFilesTillNow);
        IOUtils.writeObjectToFile(sents, file);
        Data.sentsFiles.add(file);
        for (String sentid : sents.keySet()) {
            assert !Data.sentId2File.containsKey(sentid) : "Data.sentId2File already contains " + sentid + ". Make sure sentIds are unique!";
            Data.sentId2File.put(sentid, file);
        }
        sents.clear();
    }
    // not lugging around sents if batch processing
    if (batchProcessSents)
        sents = null;
    return numFilesTillNow;
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) GoldAnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10