use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class POSMikheevCounter method buildTableHelper.
/**
* A table is built from a given source corpus file by counting the number of times that each
* suffix-POS association in a source corpus.
*
* @param fileName file name of the source corpus
* @throws Exception
**/
private void buildTableHelper(String fileName) throws Exception {
PennTreebankPOSReader reader = new PennTreebankPOSReader(this.corpusName);
reader.readFile(fileName);
List<TextAnnotation> tas = reader.getTextAnnotations();
for (TextAnnotation ta : tas) {
for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
String form = ta.getToken(tokenId);
String tag = ((SpanLabelView) ta.getView(ViewNames.POS)).getLabel(tokenId);
if (form.length() >= 5) {
boolean allLetters = true;
for (int i = form.length() - 3; i < form.length() && allLetters; ++i) allLetters = Character.isLetter(form.charAt(i));
if (allLetters) {
// Word w = (Word) example;
HashMap<String, TreeMap<String, Integer>> t = null;
if (WordHelpers.isCapitalized(ta, tokenId)) {
int headOfSentence = ta.getSentence(ta.getSentenceId(tokenId)).getStartSpan();
if (tokenId == headOfSentence)
t = firstCapitalized;
else
t = notFirstCapitalized;
} else {
if (form.contains("-"))
return;
t = table;
}
form = form.toLowerCase();
count(t, form.substring(form.length() - 3), tag);
if (form.length() >= 6 && Character.isLetter(form.charAt(form.length() - 4)))
count(t, form.substring(form.length() - 4), tag);
}
}
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class StanfordOpenIEHandler method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
SpanLabelView vu = new SpanLabelView(viewName, ta);
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
for (RelationTriple triple : triples) {
Constituent subject = getConstituent(triple.subjectGloss(), triple.subjectTokenSpan(), sentence, ta);
subject.addAttribute("subjectGloss", triple.subjectGloss());
subject.addAttribute("subjectLemmaGloss", triple.subjectLemmaGloss());
subject.addAttribute("subjectLink", triple.subjectLink());
Constituent object = getConstituent(triple.objectGloss(), triple.objectTokenSpan(), sentence, ta);
object.addAttribute("objectGloss", triple.objectGloss());
object.addAttribute("objectLemmaGloss", triple.objectLemmaGloss());
object.addAttribute("objectLink", triple.objectLink());
Constituent relation = getConstituent(triple.relationGloss(), triple.relationTokenSpan(), sentence, ta);
relation.addAttribute("relationGloss", triple.relationGloss());
relation.addAttribute("relationLemmaGloss", triple.relationLemmaGloss());
Relation subj = new Relation("subj", relation, subject, triple.confidence);
Relation obj = new Relation("obj", relation, object, triple.confidence);
vu.addRelation(subj);
vu.addRelation(obj);
vu.addConstituent(subject);
vu.addConstituent(object);
vu.addConstituent(relation);
}
}
ta.addView(viewName, vu);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class PrepSRLAnnotator method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
List<Constituent> candidates = new ArrayList<>();
for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) {
int tokenId = c.getStartSpan();
if (PrepSRLDataReader.isPrep(ta, tokenId))
candidates.add(c.cloneForNewViewWithDestinationLabel(viewName, DataReader.CANDIDATE));
// Now check bigrams & trigrams
Constituent multiWordPrep = PrepSRLDataReader.isBigramPrep(ta, tokenId, viewName);
if (multiWordPrep != null)
candidates.add(multiWordPrep);
multiWordPrep = PrepSRLDataReader.isTrigramPrep(ta, tokenId, viewName);
if (multiWordPrep != null)
candidates.add(multiWordPrep);
}
SpanLabelView prepositionLabelView = new SpanLabelView(viewName, viewName + "-annotator", ta, 1.0, true);
for (Constituent c : candidates) {
String role = classifier.discreteValue(c);
if (!role.equals(DataReader.CANDIDATE))
prepositionLabelView.addSpanLabel(c.getStartSpan(), c.getEndSpan(), role, 1.0);
}
ta.addView(viewName, prepositionLabelView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class Quantifier method addView.
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
assert (ta.hasView(ViewNames.SENTENCE));
SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d);
List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta);
for (QuantSpan span : quantSpans) {
int startToken = ta.getTokenIdFromCharacterOffset(span.start);
int endToken = ta.getTokenIdFromCharacterOffset(span.end);
quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d);
}
ta.addView(ViewNames.QUANTITIES, quantifierView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class PredicateDetector method getLemma.
public Option<String> getLemma(TextAnnotation ta, int tokenId) {
String pos = WordHelpers.getPOS(ta, tokenId);
String token = ta.getToken(tokenId).toLowerCase();
String lemma = WordHelpers.getLemma(ta, tokenId);
boolean predicate = false;
// any token that is a verb is a predicate
if (POSUtils.isPOSVerb(pos) && !pos.equals("AUX")) {
if (token.equals("'s") || token.equals("'re") || token.equals("'m"))
lemma = "be";
else if (token.equals("'d") || lemma.equals("wo") || lemma.equals("'ll"))
lemma = "xmodal";
predicate = !(lemma.equals("xmodal") || pos.equals("MD") || token.equals("'ve"));
// ignore all instances of has + "to be" if they are followed by a
// verb or if the token is "be" followed by a verb
boolean doVerb = lemma.equals("do");
boolean be = lemma.equals("be");
boolean have = lemma.equals("have");
if (tokenId < ta.size() - 1) {
if (be) {
SpanLabelView chunk = (SpanLabelView) ta.getView(ViewNames.SHALLOW_PARSE);
for (Constituent c : chunk.getConstituentsCoveringToken(tokenId)) {
// token, then there is another verb here
if (c.getEndSpan() - 1 != tokenId) {
predicate = false;
break;
}
}
}
// ignore "have + be"
if (have && WordHelpers.getLemma(ta, tokenId + 1).equals("be")) {
predicate = false;
}
// ignore "have/do + verb"
if ((have || doVerb) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1)))
predicate = false;
if (token.equals("according") && ta.getToken(tokenId + 1).toLowerCase().equals("to"))
predicate = false;
}
if (tokenId < ta.size() - 2) {
// ignore don't + V or haven't + V
if (doVerb || have) {
String nextToken = ta.getToken(tokenId + 1).toLowerCase();
if ((nextToken.equals("n't") || nextToken.equals("not")) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 2)))
predicate = false;
}
}
} else if (token.startsWith("re-")) {
String trim = token.replace("re-", "");
predicate = WordNetPlusLemmaViewGenerator.lemmaDict.contains(trim);
}
if (predicate) {
return new Option<>(lemma);
} else {
return Option.empty();
}
}
Aggregations