use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaFeatureExtractor method classify.
@Override
public FeatureVector classify(Object o) {
// Make sure the object is a Constituent
if (!(o instanceof Constituent))
throw new IllegalArgumentException("Instance must be of type Constituent");
Constituent instance = (Constituent) o;
FeatureVector featureVector = new FeatureVector();
try {
featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
} catch (Exception e) {
logger.debug("Couldn't generate feature {} for constituent {}", getName(), instance);
}
return featureVector;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class BIOTester method getConstituent.
/**
* @param curToken The token of the start of a mention (either gold/predicted)
* @param classifier The selected classifier from joint_inference
* @param isGold Indicates if getting the gold mention or not
* @return A constituent of the entire mention head. The size may be larger than 1.
*/
public static Constituent getConstituent(Constituent curToken, Classifier classifier, boolean isGold) {
View bioView = curToken.getTextAnnotation().getView("BIO");
String goldType = "NA";
if (isGold) {
if (!curToken.getAttribute("BIO").startsWith("O")) {
goldType = (curToken.getAttribute("BIO").split("-"))[1];
}
}
List<String> predictedTypes = new ArrayList<>();
if (!isGold) {
predictedTypes.add((inference(curToken, classifier).split("-"))[1]);
}
int startIdx = curToken.getStartSpan();
int endIdx = startIdx + 1;
// if and only if the start predicted BIOLU tag is "B"
if (inference(curToken, classifier).startsWith("B") && endIdx < bioView.getEndSpan()) {
String preBIOLevel2_dup = curToken.getAttribute("preBIOLevel1");
String preBIOLevel1_dup = inference(curToken, classifier);
Constituent pointerToken = null;
while (endIdx < bioView.getEndSpan()) {
pointerToken = bioView.getConstituentsCoveringToken(endIdx).get(0);
pointerToken.addAttribute("preBIOLevel1", preBIOLevel1_dup);
pointerToken.addAttribute("preBIOLevel2", preBIOLevel2_dup);
if (isGold) {
String curGold = pointerToken.getAttribute("BIO");
if (!(curGold.startsWith("I") || curGold.startsWith("L"))) {
break;
}
} else {
String curPrediction = inference(pointerToken, classifier);
if (!(curPrediction.startsWith("I") || curPrediction.startsWith("L"))) {
break;
}
predictedTypes.add(curPrediction.split("-")[1]);
}
preBIOLevel2_dup = preBIOLevel1_dup;
preBIOLevel1_dup = inference(pointerToken, classifier);
endIdx++;
}
}
String entityType = goldType;
String entityMentionType = curToken.getAttribute("EntityMentionType");
if (!isGold) {
entityType = mostCommon(predictedTypes);
String className = classifier.getClass().toString();
// The className variable is in form "...bio_classifier_[TYPE]"
// Take the last three characters which stands for the mention level.
entityMentionType = className.substring(className.length() - 3).toUpperCase();
}
Constituent wholeMention = new Constituent(entityMentionType + "-" + entityType, 1.0f, "BIO_Mention", curToken.getTextAnnotation(), startIdx, endIdx);
wholeMention.addAttribute("EntityType", entityType);
wholeMention.addAttribute("EntityMentionType", entityMentionType);
return wholeMention;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class BIOTester method statistics.
public static void statistics() {
int ace_nam = 0;
int ace_nom = 0;
int ace_pro = 0;
int ere_nam = 0;
int ere_nom = 0;
int ere_pro = 0;
int tac_nam = 0;
int tac_nom = 0;
try {
ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer("data/all", false);
for (TextAnnotation ta : aceReader) {
for (Constituent c : ta.getView(ViewNames.MENTION_ACE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ace_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ace_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ace_pro++;
}
}
}
EREMentionRelationReader ereReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
for (XmlTextAnnotation xta : ereReader) {
TextAnnotation ta = xta.getTextAnnotation();
for (Constituent c : ta.getView(ViewNames.MENTION_ERE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ere_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ere_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ere_pro++;
}
}
}
ColumnFormatReader columnFormatReader = new ColumnFormatReader("data/tac/2016.nam");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nam++;
}
}
columnFormatReader = new ColumnFormatReader("data/tac/2016.nom");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nom++;
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("ACE_NAM: " + ace_nam);
System.out.println("ACE_NOM: " + ace_nom);
System.out.println("ACE_PRO: " + ace_pro);
System.out.println("ERE_NAM: " + ere_nam);
System.out.println("ERE_NOM: " + ere_nom);
System.out.println("ERE_PRO: " + ere_pro);
System.out.println("TAC_NAM: " + tac_nam);
System.out.println("TAC_NOM: " + tac_nom);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class AnnotatorTester method test_custom_annotator.
public static void test_custom_annotator() {
ACEReader aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
int total_labeled = 0;
int total_predicted = 0;
int total_correct = 0;
int total_type_correct = 0;
int total_extent_correct = 0;
try {
aceReader = new ACEReader("data/partition_with_dev/dev", false);
MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
mentionAnnotator.addView(ta);
total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
gc.addAttribute("EntityType", gc.getLabel());
Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
if (gch == null) {
continue;
}
if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
total_correct++;
if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
total_type_correct++;
}
if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
total_extent_correct++;
}
break;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Labeled: " + total_labeled);
System.out.println("Predicted: " + total_predicted);
System.out.println("Correct: " + total_correct);
System.out.println("Type Correct: " + total_type_correct);
System.out.println("Extent Correct: " + total_extent_correct);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class AnnotatorTester method test_basic_annotator.
/**
* By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
*/
public static void test_basic_annotator() {
ACEReader aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
int total_labeled = 0;
int total_predicted = 0;
int total_correct = 0;
int total_type_correct = 0;
int total_extent_correct = 0;
try {
aceReader = new ACEReader("data/partition_with_dev/dev", false);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_NONTYPE");
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
mentionAnnotator.addView(ta);
total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
gc.addAttribute("EntityType", gc.getLabel());
Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
if (gch == null) {
continue;
}
if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
total_correct++;
if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
total_type_correct++;
}
if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
total_extent_correct++;
}
break;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Labeled: " + total_labeled);
System.out.println("Predicted: " + total_predicted);
System.out.println("Correct: " + total_correct);
System.out.println("Type Correct: " + total_type_correct);
System.out.println("Extent Correct: " + total_extent_correct);
}
Aggregations