use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class FileIOAnnotationJob method getNERString.
protected String getNERString() {
List<Constituent> constituents = new ArrayList<>(view.getConstituents());
Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
StringBuilder sb = new StringBuilder();
String text = textAnnotation.getText();
int where = 0;
for (Constituent c : constituents) {
int start = c.getStartCharOffset();
String startstring = text.substring(where, start);
sb.append(startstring).append("[").append(c.getLabel()).append(" ").append(c.getTokenizedSurfaceForm()).append(" ] ");
where = c.getEndCharOffset();
}
return sb.toString();
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaFeatureExtractor method classify.
@Override
public FeatureVector classify(Object o) {
// Make sure the object is a Constituent
if (!(o instanceof Constituent))
throw new IllegalArgumentException("Instance must be of type Constituent");
Constituent instance = (Constituent) o;
FeatureVector featureVector = new FeatureVector();
try {
featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
} catch (Exception e) {
logger.debug("Couldn't generate feature {} for constituent {}", getName(), instance);
}
return featureVector;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class StanfordParseHandler method checkLength.
static void checkLength(TextAnnotation textAnnotation, boolean throwExceptionOnSentenceLengthCheck, int maxParseSentenceLength) throws AnnotatorException {
if (throwExceptionOnSentenceLengthCheck) {
Constituent c = HandlerUtils.checkTextAnnotationRespectsSentenceLengthLimit(textAnnotation, maxParseSentenceLength);
if (null != c) {
String msg = HandlerUtils.getSentenceLengthError(textAnnotation.getId(), c.getSurfaceForm(), maxParseSentenceLength);
logger.error(msg);
throw new AnnotatorException(msg);
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class PredicateDetector method getLemma.
public Option<String> getLemma(TextAnnotation ta, int tokenId) {
String pos = WordHelpers.getPOS(ta, tokenId);
String token = ta.getToken(tokenId).toLowerCase();
String lemma = WordHelpers.getLemma(ta, tokenId);
boolean predicate = false;
// any token that is a verb is a predicate
if (POSUtils.isPOSVerb(pos) && !pos.equals("AUX")) {
if (token.equals("'s") || token.equals("'re") || token.equals("'m"))
lemma = "be";
else if (token.equals("'d") || lemma.equals("wo") || lemma.equals("'ll"))
lemma = "xmodal";
predicate = !(lemma.equals("xmodal") || pos.equals("MD") || token.equals("'ve"));
// ignore all instances of has + "to be" if they are followed by a
// verb or if the token is "be" followed by a verb
boolean doVerb = lemma.equals("do");
boolean be = lemma.equals("be");
boolean have = lemma.equals("have");
if (tokenId < ta.size() - 1) {
if (be) {
SpanLabelView chunk = (SpanLabelView) ta.getView(ViewNames.SHALLOW_PARSE);
for (Constituent c : chunk.getConstituentsCoveringToken(tokenId)) {
// token, then there is another verb here
if (c.getEndSpan() - 1 != tokenId) {
predicate = false;
break;
}
}
}
// ignore "have + be"
if (have && WordHelpers.getLemma(ta, tokenId + 1).equals("be")) {
predicate = false;
}
// ignore "have/do + verb"
if ((have || doVerb) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1)))
predicate = false;
if (token.equals("according") && ta.getToken(tokenId + 1).toLowerCase().equals("to"))
predicate = false;
}
if (tokenId < ta.size() - 2) {
// ignore don't + V or haven't + V
if (doVerb || have) {
String nextToken = ta.getToken(tokenId + 1).toLowerCase();
if ((nextToken.equals("n't") || nextToken.equals("not")) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 2)))
predicate = false;
}
}
} else if (token.startsWith("re-")) {
String trim = token.replace("re-", "");
predicate = WordNetPlusLemmaViewGenerator.lemmaDict.contains(trim);
}
if (predicate) {
return new Option<>(lemma);
} else {
return Option.empty();
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class PredicateDetector method getPredicates.
public List<Constituent> getPredicates(TextAnnotation ta) throws Exception {
List<Constituent> list = new ArrayList<>();
for (int i = 0; i < ta.size(); i++) {
Option<String> opt = getLemma(ta, i);
if (opt.isPresent()) {
Constituent c = new Constituent("", "", ta, i, i + 1);
c.addAttribute(PredicateArgumentView.LemmaIdentifier, opt.get());
list.add(c);
}
}
return list;
}
Aggregations