use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.
the class QuantifiableEntityExtractorITest method runAndCheck.
public void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) throws Exception {
for (int si = 0; si < sentences.length; si++) {
String sentence = sentences[si];
Annotation annotation = createDocument(sentence);
List<MatchedExpression> matchedExpressions = extractor.extract(annotation);
// Print out matched text and value
if (expected == null) {
for (int i = 0; i < matchedExpressions.size(); i++) {
String text = matchedExpressions.get(i).getText();
Object value = matchedExpressions.get(i).getValue();
System.out.println(prefix + ": Got expression " + text + " with value " + value);
}
assertTrue(prefix + ": No expected provided", false);
} else {
int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
for (int i = 0; i < minMatchable; i++) {
ExpectedQuantity expectedQuantity = expected[si][i];
MatchedExpression matched = matchedExpressions.get(i);
SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
}
assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
}
}
}
use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.
the class KBPTokensregexExtractor method classify.
@Override
public Pair<String, Double> classify(KBPInput input) {
// Annotate Sentence
CoreMap sentenceAsMap = input.sentence.asCoreMap(Sentence::nerTags);
List<CoreLabel> tokens = sentenceAsMap.get(CoreAnnotations.TokensAnnotation.class);
// Annotate where the subject is
for (int i : input.subjectSpan) {
tokens.get(i).set(Subject.class, "true");
if ("O".equals(tokens.get(i).ner())) {
tokens.get(i).setNER(input.subjectType.name);
}
}
// Annotate where the object is
for (int i : input.objectSpan) {
tokens.get(i).set(Object.class, "true");
if ("O".equals(tokens.get(i).ner())) {
tokens.get(i).setNER(input.objectType.name);
}
}
// Run Rules
for (RelationType rel : RelationType.values()) {
if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) {
CoreMapExpressionExtractor extractor = rules.get(rel);
@SuppressWarnings("unchecked") List<MatchedExpression> extractions = extractor.extractExpressions(sentenceAsMap);
if (extractions != null && extractions.size() > 0) {
MatchedExpression best = MatchedExpression.getBestMatched(extractions, MatchedExpression.EXPR_WEIGHT_SCORER);
// Un-Annotate Sentence
for (CoreLabel token : tokens) {
token.remove(Subject.class);
token.remove(Object.class);
}
return Pair.makePair(rel.canonicalName, best.getWeight());
}
}
}
// Un-Annotate Sentence
for (CoreLabel token : tokens) {
token.remove(Subject.class);
token.remove(Object.class);
}
return Pair.makePair(NO_RELATION, 1.0);
}
use of edu.stanford.nlp.ling.tokensregex.MatchedExpression in project CoreNLP by stanfordnlp.
the class TokensRegexDemo method main.
public static void main(String[] args) throws IOException {
String rules;
if (args.length > 0) {
rules = args[0];
} else {
rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
}
PrintWriter out;
if (args.length > 2) {
out = new PrintWriter(args[2]);
} else {
out = new PrintWriter(System.out);
}
CoreMapExpressionExtractor<MatchedExpression> extractor = CoreMapExpressionExtractor.createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
Annotation annotation;
if (args.length > 1) {
annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
} else {
annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
}
pipeline.annotate(annotation);
// An Annotation is a Map and you can get and use the various analyses individually.
out.println();
// The toString() method on an Annotation just prints the text of the Annotation
// But you can see what is in it with other methods like toShorterString()
out.println("The top level annotation");
out.println(annotation.toShorterString());
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
for (MatchedExpression matched : matchedExpressions) {
// Print out matched text and value
out.println("Matched expression: " + matched.getText() + " with value " + matched.getValue());
// Print out token information
CoreMap cm = matched.getAnnotation();
for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
out.println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
}
}
}
out.flush();
}
Aggregations