Search in sources :

Example 71 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class DependencyParser method genDictionaries.

/**
   * Scan a corpus and store all words, part-of-speech tags, and
   * dependency relation labels observed. Prepare other structures
   * which support word / POS / label lookup at train- / run-time.
   */
private void genDictionaries(List<CoreMap> sents, List<DependencyTree> trees) {
    // Collect all words (!), etc. in lists, tacking on one sentence
    // after the other
    List<String> word = new ArrayList<>();
    List<String> pos = new ArrayList<>();
    List<String> label = new ArrayList<>();
    for (CoreMap sentence : sents) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            word.add(token.word());
            pos.add(token.tag());
        }
    }
    String rootLabel = null;
    for (DependencyTree tree : trees) for (int k = 1; k <= tree.n; ++k) if (tree.getHead(k) == 0)
        rootLabel = tree.getLabel(k);
    else
        label.add(tree.getLabel(k));
    // Generate "dictionaries," possibly with frequency cutoff
    knownWords = Util.generateDict(word, config.wordCutOff);
    knownPos = Util.generateDict(pos);
    knownLabels = Util.generateDict(label);
    knownLabels.add(0, rootLabel);
    // Avoid the case that rootLabel equals to one of the other labels
    for (int k = 1; k < knownLabels.size(); ++k) if (knownLabels.get(k).equals(rootLabel)) {
        knownLabels.remove(k);
        break;
    }
    knownWords.add(0, Config.UNKNOWN);
    knownWords.add(1, Config.NULL);
    knownWords.add(2, Config.ROOT);
    knownPos.add(0, Config.UNKNOWN);
    knownPos.add(1, Config.NULL);
    knownPos.add(2, Config.ROOT);
    knownLabels.add(0, Config.NULL);
    generateIDs();
    log.info(Config.SEPARATOR);
    log.info("#Word: " + knownWords.size());
    log.info("#POS:" + knownPos.size());
    log.info("#Label: " + knownLabels.size());
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 72 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class Util method writeConllFile.

public static void writeConllFile(String outFile, List<CoreMap> sentences, List<DependencyTree> trees) {
    try {
        PrintWriter output = IOUtils.getPrintWriter(outFile);
        for (int i = 0; i < sentences.size(); i++) {
            CoreMap sentence = sentences.get(i);
            DependencyTree tree = trees.get(i);
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (int j = 1, size = tokens.size(); j <= size; ++j) {
                CoreLabel token = tokens.get(j - 1);
                output.printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n", j, token.word(), token.tag(), token.tag(), tree.getHead(j), tree.getLabel(j));
            }
            output.println();
        }
        output.close();
    } catch (Exception e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 73 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class DependencyParserCoreNLPDemo method main.

public static void main(String[] args) {
    String text;
    if (args.length > 0) {
        text = IOUtils.slurpFileNoExceptions(args[0], "utf-8");
    } else {
        text = "I can almost always tell when movies use fake dinosaurs.";
    }
    Annotation ann = new Annotation(text);
    Properties props = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,depparse", "depparse.model", DependencyParser.DEFAULT_MODEL);
    AnnotationPipeline pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);
    for (CoreMap sent : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        SemanticGraph sg = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
        log.info(IOUtils.eolChar + sg.toString(SemanticGraph.OutputFormat.LIST));
    }
}
Also used : AnnotationPipeline(edu.stanford.nlp.pipeline.AnnotationPipeline) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 74 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class NumberNormalizerITest method testNumbers.

public void testNumbers() throws IOException {
    // Set up test text
    String testText = "two dozen\n" + "six hundred,\n" + "four hundred, and twelve.\n" + "4 million six hundred fifty thousand, two hundred and eleven.\n" + "6 hundred billion, five million six hundred fifty thousand, three hundred and seventy six\n" + "5,786,345\n" + "twenty-five.\n" + //      "one and half million\n" +
    "1.3 million.\n" + "one thousand two hundred and twenty four\n" + "10 thousand million.\n" + "3.625\n" + "zero\n" + "-15\n" + "one two three four.\n" + "one hundred and fifty five\n" + "a hundred\n";
    // set up expected results
    Iterator<? extends Number> expectedNumbers = Arrays.asList(24.0, 600.0, 412.0, 4650211.0, 600005650376.0, 5786345, 25.0, 1300000.0, 1224.0, 10000000000.0, 3.625, 0, -15.0, 1, 2, 3, 4, 155.0, 100).iterator();
    Iterator<String> expectedTexts = Arrays.asList("two dozen", "six hundred", "four hundred, and twelve", "4 million six hundred fifty thousand, two hundred and eleven", "6 hundred billion, five million six hundred fifty thousand, three hundred and seventy six", "5,786,345", "twenty-five", //      "one and half million\n" +
    "1.3 million", "one thousand two hundred and twenty four", "10 thousand million", "3.625", "zero", "-15", "one", "two", "three", "four", "one hundred and fifty five", "hundred").iterator();
    // create document
    Annotation document = createDocument(testText);
    // Annotate numbers
    NumberNormalizer.findAndAnnotateNumericExpressions(document);
    // Check answers
    for (CoreMap num : document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
        if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
            Number expectedNumber = expectedNumbers.next();
            String expectedType = "NUMBER";
            String expectedText = expectedTexts.next();
            String text = document.get(CoreAnnotations.TextAnnotation.class).substring(num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            assertEquals(expectedText, text);
            assertEquals(expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
            assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
        }
    }
    assertFalse(expectedNumbers.hasNext());
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 75 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class NumberNormalizerITest method testOrdinals.

public void testOrdinals() throws IOException {
    // Set up test text
    String testText = "0th, 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th, 10th\n" + "zeroth, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth\n" + "11th, 12th, 13th, 14th, 15th, 16th, 17th, 18th, 19th, 20th\n" + "Eleventh, twelfth, thirteenth, Fourteenth, fifteenth, Sixteenth, seventeenth, eighteenth, nineteenth, twentieth\n" + "Twenty-first, twenty first, twenty second, twenty third, twenty fourth\n" + "thirtieth, thirty first, thirty-second," + "fortieth, one hundredth, two hundredth, one hundred and fifty first, one hundred fifty first";
    // TODO: Fix consistency of number representation
    // set up expected results
    Iterator<? extends Number> expectedNumbers = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21.0, 21.0, 22.0, 23.0, 24.0, 30, 31.0, 32.0, 40, 100.0, 200.0, 151.0, 151.0).iterator();
    Iterator<String> expectedTexts = Arrays.asList(testText.split("\\s*[,\\n]+\\s*")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Annotate numbers
    NumberNormalizer.findAndAnnotateNumericExpressions(document);
    // Check answers
    for (CoreMap num : document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
        if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
            Number expectedNumber = expectedNumbers.next();
            String expectedType = "ORDINAL";
            String expectedText = expectedTexts.next();
            String text = document.get(CoreAnnotations.TextAnnotation.class).substring(num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            assertEquals(expectedText, text);
            assertEquals("Type for " + expectedText, expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
            assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
        }
    }
    assertFalse(expectedNumbers.hasNext());
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9