Search in sources :

Example 1 with MatchedExpression

use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.

the class QuantifiableEntityExtractorITest method runAndCheck.

public void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) throws Exception {
    for (int si = 0; si < sentences.length; si++) {
        String sentence = sentences[si];
        Annotation annotation = createDocument(sentence);
        List<MatchedExpression> matchedExpressions = extractor.extract(annotation);
        // Print out matched text and value
        if (expected == null) {
            for (int i = 0; i < matchedExpressions.size(); i++) {
                String text = matchedExpressions.get(i).getText();
                Object value = matchedExpressions.get(i).getValue();
                System.out.println(prefix + ": Got expression " + text + " with value " + value);
            assertTrue(prefix + ": No expected provided", false);
        } else {
            int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
            for (int i = 0; i < minMatchable; i++) {
                ExpectedQuantity expectedQuantity = expected[si][i];
                MatchedExpression matched = matchedExpressions.get(i);
                SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
                assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
                assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
                assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
            assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
Also used : MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression)

Example 2 with MatchedExpression

use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.

the class KBPTokensregexExtractor method classify.

public Pair<String, Double> classify(KBPInput input) {
    // Annotate Sentence
    CoreMap sentenceAsMap = input.sentence.asCoreMap(Sentence::nerTags);
    List<CoreLabel> tokens = sentenceAsMap.get(CoreAnnotations.TokensAnnotation.class);
    // Annotate where the subject is
    for (int i : input.subjectSpan) {
        tokens.get(i).set(Subject.class, "true");
        if ("O".equals(tokens.get(i).ner())) {
    // Annotate where the object is
    for (int i : input.objectSpan) {
        tokens.get(i).set(Object.class, "true");
        if ("O".equals(tokens.get(i).ner())) {
    // Run Rules
    for (RelationType rel : RelationType.values()) {
        if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) {
            CoreMapExpressionExtractor extractor = rules.get(rel);
            @SuppressWarnings("unchecked") List<MatchedExpression> extractions = extractor.extractExpressions(sentenceAsMap);
            if (extractions != null && extractions.size() > 0) {
                MatchedExpression best = MatchedExpression.getBestMatched(extractions, MatchedExpression.EXPR_WEIGHT_SCORER);
                // Un-Annotate Sentence
                for (CoreLabel token : tokens) {
                return Pair.makePair(rel.canonicalName, best.getWeight());
    // Un-Annotate Sentence
    for (CoreLabel token : tokens) {
    return Pair.makePair(NO_RELATION, 1.0);
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreMapExpressionExtractor(edu.stanford.nlp.ling.tokensregex.CoreMapExpressionExtractor) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) Sentence(edu.stanford.nlp.simple.Sentence) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression)

Example 3 with MatchedExpression

use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.

the class TokensRegexDemo method main.

public static void main(String[] args) throws IOException {
    String rules;
    if (args.length > 0) {
        rules = args[0];
    } else {
        rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
    PrintWriter out;
    if (args.length > 2) {
        out = new PrintWriter(args[2]);
    } else {
        out = new PrintWriter(System.out);
    CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
    Annotation annotation;
    if (args.length > 1) {
        annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    } else {
        annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
    // An Annotation is a Map and you can get and use the various analyses individually.
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()
    out.println("The top level annotation");
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
        for (MatchedExpression matched : matchedExpressions) {
            // Print out matched text and value
            out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
            // Print out token information
            CoreMap cm = matched.getAnnotation();
            for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                out.println("  Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) Annotation(edu.stanford.nlp.pipeline.Annotation) PrintWriter(


MatchedExpression (edu.stanford.nlp.ling.tokensregex.MatchedExpression)3 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 CoreMapExpressionExtractor (edu.stanford.nlp.ling.tokensregex.CoreMapExpressionExtractor)1 Annotation (edu.stanford.nlp.pipeline.Annotation)1 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)1 Sentence (edu.stanford.nlp.simple.Sentence)1 PrintWriter (