use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method test_hybrid.
/**
* Test the model trained on hybrid ACE/ERE and evaluated on hybrid ACE/ERE
* Produce results on separate types
* @throws DatastoreException
* @throws JWNLException
* @throws IOException
* @throws InvalidEndpointException
* @throws InvalidPortException
*/
public static void test_hybrid() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
int total_labeled_mention = 0;
int total_predicted_mention = 0;
int total_correct_mention = 0;
int total_ace_labeled_mention = 0;
int total_ere_labeled_mention = 0;
int total_ace_predicted_mention = 0;
int total_ere_predicted_mention = 0;
int total_ace_correct_mention = 0;
int total_ere_correct_mention = 0;
int total_ace_type_correct = 0;
int total_ere_type_correct = 0;
for (int i = 0; i < 5; i++) {
Parser test_parser = new BIOCombinedReader(i, "ALL-EVAL", "ALL");
Parser train_parser_nam = new BIOCombinedReader(i, "ALL-TRAIN", "NAM");
Parser train_parser_nom = new BIOCombinedReader(i, "ALL-TRAIN", "NOM");
Parser train_parser_pro = new BIOCombinedReader(i, "ALL-TRAIN", "PRO");
bio_classifier_nam classifier_nam = train_nam_classifier(train_parser_nam);
bio_classifier_nom classifier_nom = train_nom_classifier(train_parser_nom);
bio_classifier_pro classifier_pro = train_pro_classifier(train_parser_pro);
Learner[] candidates = new Learner[3];
candidates[0] = classifier_nam;
candidates[1] = classifier_nom;
candidates[2] = classifier_pro;
String preBIOLevel1 = "";
String preBIOLevel2 = "";
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
((Constituent) example).addAttribute("preBIOLevel1", preBIOLevel1);
((Constituent) example).addAttribute("preBIOLevel2", preBIOLevel2);
Pair<String, Integer> cands = joint_inference((Constituent) example, candidates);
String bioTag = cands.getFirst();
int learnerIdx = cands.getSecond();
preBIOLevel2 = preBIOLevel1;
preBIOLevel1 = bioTag;
boolean goldStart = false;
boolean predictedStart = false;
if (bioTag.startsWith("B") || bioTag.startsWith("U")) {
total_predicted_mention++;
if (((Constituent) example).getTextAnnotation().getId().startsWith("bn") || ((Constituent) example).getTextAnnotation().getId().startsWith("nw")) {
total_ace_predicted_mention++;
} else {
total_ere_predicted_mention++;
}
predictedStart = true;
}
String correctTag = ((Constituent) example).getAttribute("BIO");
if (correctTag.startsWith("B") || correctTag.startsWith("U")) {
total_labeled_mention++;
if (((Constituent) example).getTextAnnotation().getId().startsWith("bn") || ((Constituent) example).getTextAnnotation().getId().startsWith("nw")) {
total_ace_labeled_mention++;
} else {
total_ere_labeled_mention++;
}
goldStart = true;
}
if (goldStart && predictedStart) {
Constituent goldMention = getConstituent((Constituent) example, candidates[learnerIdx], true);
Constituent predictMention = getConstituent((Constituent) example, candidates[learnerIdx], false);
boolean boundaryCorrect = false;
boolean typeCorrect = false;
if (goldMention.getStartSpan() == predictMention.getStartSpan() && goldMention.getEndSpan() == predictMention.getEndSpan()) {
boundaryCorrect = true;
}
if (goldMention.getAttribute("EntityType").equals(predictMention.getAttribute("EntityType"))) {
typeCorrect = true;
}
if (boundaryCorrect) {
total_correct_mention++;
if (((Constituent) example).getTextAnnotation().getId().startsWith("bn") || ((Constituent) example).getTextAnnotation().getId().startsWith("nw")) {
total_ace_correct_mention++;
} else {
total_ere_correct_mention++;
}
if (typeCorrect) {
if (((Constituent) example).getTextAnnotation().getId().startsWith("bn") || ((Constituent) example).getTextAnnotation().getId().startsWith("nw")) {
total_ace_type_correct++;
} else {
total_ere_type_correct++;
}
}
}
}
}
}
System.out.println("Total Labeled Mention: " + total_labeled_mention);
System.out.println("Total Predicted Mention: " + total_predicted_mention);
System.out.println("Total Correct Mention: " + total_correct_mention);
double p = (double) total_correct_mention / (double) total_predicted_mention;
double r = (double) total_correct_mention / (double) total_labeled_mention;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
System.out.println("Total Labeled Mention ACE: " + total_ace_labeled_mention);
System.out.println("Total Predicted Mention ACE: " + total_ace_predicted_mention);
System.out.println("Total Correct Mention ACE: " + total_ace_correct_mention);
System.out.println("Total Type Correct ACE: " + total_ace_type_correct);
p = (double) total_ace_correct_mention / (double) total_ace_predicted_mention;
r = (double) total_ace_correct_mention / (double) total_ace_labeled_mention;
f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
System.out.println("Total Labeled Mention ERE: " + total_ere_labeled_mention);
System.out.println("Total Predicted Mention ERE: " + total_ere_predicted_mention);
System.out.println("Total Correct Mention ERE: " + total_ere_correct_mention);
System.out.println("Total Type Correct ERE: " + total_ere_type_correct);
p = (double) total_ere_correct_mention / (double) total_ere_predicted_mention;
r = (double) total_ere_correct_mention / (double) total_ere_labeled_mention;
f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method test_cv.
/**
* Cross Validation tester
* @throws DatastoreException
* @throws JWNLException
* @throws IOException
* @throws InvalidEndpointException
* @throws InvalidPortException
*/
public static void test_cv() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
boolean isBIO = false;
int total_labeled_mention = 0;
int total_predicted_mention = 0;
int total_correct_mention = 0;
int violations = 0;
for (int i = 0; i < 5; i++) {
Parser test_parser = new BIOCombinedReader(i, "ERE-EVAL", "ALL");
bio_label output = new bio_label();
System.out.println("Start training fold " + i);
Parser train_parser_nam = new BIOCombinedReader(i, "ERE-TRAIN", "NAM");
Parser train_parser_nom = new BIOCombinedReader(i, "ERE-TRAIN", "NOM");
Parser train_parser_pro = new BIOCombinedReader(i, "ERE-TRAIN", "PRO");
bio_classifier_nam classifier_nam = train_nam_classifier(train_parser_nam);
bio_classifier_nom classifier_nom = train_nom_classifier(train_parser_nom);
bio_classifier_pro classifier_pro = train_pro_classifier(train_parser_pro);
Learner[] candidates = new Learner[3];
candidates[0] = classifier_nam;
candidates[1] = classifier_nom;
candidates[2] = classifier_pro;
int labeled_mention = 0;
int predicted_mention = 0;
int correct_mention = 0;
System.out.println("Start evaluating fold " + i);
String preBIOLevel1 = "";
String preBIOLevel2 = "";
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
System.out.println(((Constituent) example).toString());
((Constituent) example).addAttribute("preBIOLevel1", preBIOLevel1);
((Constituent) example).addAttribute("preBIOLevel2", preBIOLevel2);
Pair<String, Integer> cands = joint_inference((Constituent) example, candidates);
String bioTag = cands.getFirst();
if (bioTag.equals("I") && !(preBIOLevel1.equals("I") || preBIOLevel1.equals("B"))) {
violations++;
}
if (bioTag.equals("L") && !(preBIOLevel1.equals("I") || preBIOLevel1.equals("B"))) {
violations++;
}
if (bioTag.equals("U") && (preBIOLevel1.equals("B") || preBIOLevel1.equals("I"))) {
violations++;
}
if (bioTag.equals("B") && preBIOLevel1.equals("I")) {
violations++;
}
if (bioTag.equals("O") && (preBIOLevel1.equals("I") || preBIOLevel1.equals("B"))) {
violations++;
}
preBIOLevel2 = preBIOLevel1;
preBIOLevel1 = bioTag;
boolean goldStart = false;
boolean predictedStart = false;
if (bioTag.startsWith("B") || bioTag.startsWith("U")) {
predicted_mention++;
predictedStart = true;
}
String correctTag = output.discreteValue(example);
if (correctTag.startsWith("B") || correctTag.startsWith("U")) {
labeled_mention++;
goldStart = true;
}
boolean correctBoundary = false;
if (goldStart && predictedStart) {
int candidateIdx = cands.getSecond();
Constituent goldMention = getConstituent((Constituent) example, candidates[candidateIdx], true);
Constituent predictMention = getConstituent((Constituent) example, candidates[candidateIdx], false);
if (goldMention.getStartSpan() == predictMention.getStartSpan() && goldMention.getEndSpan() == predictMention.getEndSpan()) {
correctBoundary = true;
correct_mention++;
}
}
}
total_labeled_mention += labeled_mention;
total_predicted_mention += predicted_mention;
total_correct_mention += correct_mention;
}
System.out.println("Total Labeled Mention: " + total_labeled_mention);
System.out.println("Total Predicted Mention: " + total_predicted_mention);
System.out.println("Total Correct Mention: " + total_correct_mention);
double p = (double) total_correct_mention / (double) total_predicted_mention;
double r = (double) total_correct_mention / (double) total_labeled_mention;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
System.out.println("violations: " + violations);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method test_ts.
/**
* Test set tester
* @throws JWNLException
* @throws IOException
* @throws DatastoreException
* @throws InvalidEndpointException
* @throws InvalidPortException
*/
public static void test_ts() throws InvalidPortException, InvalidEndpointException, DatastoreException, IOException, JWNLException {
boolean isBIO = false;
int total_labeled_mention = 0;
int total_predicted_mention = 0;
int total_correct_mention = 0;
int total_correct_nam = 0;
int total_false_type_nam = 0;
int total_correct_nom = 0;
int total_false_type_nom = 0;
int total_correct_pro = 0;
int total_false_type_pro = 0;
Parser test_parser = new BIOReader(getPath("dev", "ACE", 0), "ACE05-EVAL", "ALL", isBIO);
Parser train_parser_nam = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NAM", isBIO);
Parser train_parser_nom = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NOM", isBIO);
Parser train_parser_pro = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "PRO", isBIO);
bio_classifier_nam classifier_nam = train_nam_classifier(train_parser_nam);
bio_classifier_nom classifier_nom = train_nom_classifier(train_parser_nom);
bio_classifier_pro classifier_pro = train_pro_classifier(train_parser_pro);
Learner[] candidates = new Learner[3];
candidates[0] = classifier_nam;
candidates[1] = classifier_nom;
candidates[2] = classifier_pro;
String preBIOLevel1 = "";
String preBIOLevel2 = "";
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
((Constituent) example).addAttribute("preBIOLevel1", preBIOLevel1);
((Constituent) example).addAttribute("preBIOLevel2", preBIOLevel2);
Pair<String, Integer> cands = joint_inference((Constituent) example, candidates);
String bioTag = cands.getFirst();
int learnerIdx = cands.getSecond();
preBIOLevel2 = preBIOLevel1;
preBIOLevel1 = bioTag;
boolean goldStart = false;
boolean predictedStart = false;
if (bioTag.startsWith("B") || bioTag.startsWith("U")) {
total_predicted_mention++;
predictedStart = true;
}
String correctTag = ((Constituent) example).getAttribute("BIO");
if (correctTag.startsWith("B") || correctTag.startsWith("U")) {
total_labeled_mention++;
goldStart = true;
}
if (goldStart && predictedStart) {
Constituent goldMention = getConstituent((Constituent) example, candidates[learnerIdx], true);
Constituent predictMention = getConstituent((Constituent) example, candidates[learnerIdx], false);
boolean boundaryCorrect = false;
boolean typeCorrect = false;
if (goldMention.getStartSpan() == predictMention.getStartSpan() && goldMention.getEndSpan() == predictMention.getEndSpan()) {
boundaryCorrect = true;
}
if (goldMention.getAttribute("EntityType").equals(predictMention.getAttribute("EntityType"))) {
typeCorrect = true;
}
if (boundaryCorrect) {
total_correct_mention++;
if (learnerIdx == 0) {
total_correct_nam++;
}
if (learnerIdx == 1) {
total_correct_nom++;
}
if (learnerIdx == 2) {
total_correct_pro++;
}
}
if (boundaryCorrect && !typeCorrect) {
if (learnerIdx == 0) {
total_false_type_nam++;
}
if (learnerIdx == 1) {
total_false_type_nom++;
}
if (learnerIdx == 2) {
total_false_type_pro++;
System.out.println(goldMention.getTextAnnotation().getSentenceFromToken(goldMention.getStartSpan()).toString());
System.out.println(goldMention.toString() + " " + goldMention.getAttribute("EntityType") + " " + predictMention.getAttribute("EntityType"));
System.out.println();
}
}
}
}
System.out.println("Total Labeled Mention: " + total_labeled_mention);
System.out.println("Total Predicted Mention: " + total_predicted_mention);
System.out.println("Total Correct Mention: " + total_correct_mention);
double p = (double) total_correct_mention / (double) total_predicted_mention;
double r = (double) total_correct_mention / (double) total_labeled_mention;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
System.out.println("NAM: " + total_false_type_nam + "/" + total_correct_nam);
System.out.println("NOM: " + total_false_type_nom + "/" + total_correct_nom);
System.out.println("PRO: " + total_false_type_pro + "/" + total_correct_pro);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method test_tac.
public static void test_tac() throws InvalidPortException, InvalidEndpointException, DatastoreException, IOException, JWNLException {
int total_labeled_mention = 0;
int total_predicted_mention = 0;
int total_correct_mention = 0;
Parser train_parser = new BIOReader("data/all", "ACE05", "NOM", false);
Parser test_parser = new BIOReader("data/tac/2016.nom", "ColumnFormat", "ALL", false);
bio_classifier_nom classifier = train_nom_classifier(train_parser);
String preLevel1 = "";
String preLevel2 = "";
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
((Constituent) example).addAttribute("preBIOLevel1", preLevel1);
((Constituent) example).addAttribute("preBIOLevel2", preLevel2);
String predictedTag = inference((Constituent) example, classifier);
String goldTag = ((Constituent) example).getAttribute("BIO");
boolean predictedStart = false;
boolean goldStart = false;
if (predictedTag.startsWith("B") || predictedTag.startsWith("U")) {
total_predicted_mention++;
predictedStart = true;
}
if (goldTag.startsWith("B") || goldTag.startsWith("U")) {
total_labeled_mention++;
goldStart = true;
}
if (predictedStart && goldStart) {
Constituent goldMention = getConstituent((Constituent) example, classifier, true);
Constituent predictedMention = getConstituent((Constituent) example, classifier, false);
if (goldMention.getStartSpan() == predictedMention.getStartSpan() && goldMention.getEndSpan() == predictedMention.getEndSpan()) {
total_correct_mention++;
}
}
preLevel2 = preLevel1;
preLevel1 = predictedTag;
}
System.out.println("Total Labeled Mention: " + total_labeled_mention);
System.out.println("Total Predicted Mention: " + total_predicted_mention);
System.out.println("Total Correct Mention: " + total_correct_mention);
double p = (double) total_correct_mention / (double) total_predicted_mention;
double r = (double) total_correct_mention / (double) total_labeled_mention;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method test_ere.
/**
* ERE corpus tester
* @throws JWNLException
* @throws IOException
* @throws DatastoreException
* @throws InvalidEndpointException
* @throws InvalidPortException
*/
public static void test_ere() throws InvalidPortException, InvalidEndpointException, DatastoreException, IOException, JWNLException {
int total_labeled_mention = 0;
int total_predicted_mention = 0;
int total_correct_mention = 0;
int total_correct_type_match = 0;
Parser test_parser = new BIOReader(getPath("all", "ERE", 0), "ERE-EVAL", "ALL", false);
Parser train_parser_nam = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NAM", false);
Parser train_parser_nom = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NOM", false);
Parser train_parser_pro = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "PRO", false);
bio_classifier_nam classifier_nam = train_nam_classifier(train_parser_nam);
bio_classifier_nom classifier_nom = train_nom_classifier(train_parser_nom);
bio_classifier_pro classifier_pro = train_pro_classifier(train_parser_pro);
String preBIOLevel1 = "";
String preBIOLevel2 = "";
Learner[] candidates = new Learner[3];
candidates[0] = classifier_nam;
candidates[1] = classifier_nom;
candidates[2] = classifier_pro;
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
((Constituent) example).addAttribute("preBIOLevel1", preBIOLevel1);
((Constituent) example).addAttribute("preBIOLevel2", preBIOLevel2);
Pair<String, Integer> prediction = joint_inference((Constituent) example, candidates);
String goldTag = ((Constituent) example).getAttribute("BIO");
String predictedTag = prediction.getFirst();
preBIOLevel2 = preBIOLevel1;
preBIOLevel1 = predictedTag;
boolean goldStart = false;
if (goldTag.startsWith("B") || goldTag.startsWith("U")) {
total_labeled_mention++;
goldStart = true;
}
boolean predictedStart = false;
if (predictedTag.startsWith("B") || predictedTag.startsWith("U")) {
total_predicted_mention++;
predictedStart = true;
}
boolean correct = false;
boolean type_match = false;
if (goldStart && predictedStart) {
Constituent goldMention = getConstituent((Constituent) example, candidates[prediction.getSecond()], true);
Constituent predictedMention = getConstituent((Constituent) example, candidates[prediction.getSecond()], false);
if (goldMention.getStartSpan() == predictedMention.getStartSpan() && goldMention.getEndSpan() == predictedMention.getEndSpan()) {
correct = true;
}
if (goldMention.getAttribute("EntityType").equals(predictedMention.getAttribute("EntityType"))) {
type_match = true;
}
if (correct) {
total_correct_mention++;
if (type_match) {
total_correct_type_match++;
}
}
}
}
System.out.println("Total Labeled Mention: " + total_labeled_mention);
System.out.println("Total Predicted Mention: " + total_predicted_mention);
System.out.println("Total Correct Mention: " + total_correct_mention);
System.out.println("Total Correct Type Match: " + total_correct_type_match);
double p = (double) total_correct_mention / (double) total_predicted_mention;
double r = (double) total_correct_mention / (double) total_labeled_mention;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("F1: " + f);
}
Aggregations