Search in sources :

Example 11 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LBJavaFeatureExtractor method classify.

@Override
public FeatureVector classify(Object o) {
    // Make sure the object is a Constituent
    if (!(o instanceof Constituent))
        throw new IllegalArgumentException("Instance must be of type Constituent");
    Constituent instance = (Constituent) o;
    FeatureVector featureVector = new FeatureVector();
    try {
        featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
    } catch (Exception e) {
        logger.debug("Couldn't generate feature {} for constituent {}", getName(), instance);
    }
    return featureVector;
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 12 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class BIOTester method getConstituent.

/**
 * @param curToken The token of the start of a mention (either gold/predicted)
 * @param classifier The selected classifier from joint_inference
 * @param isGold Indicates if getting the gold mention or not
 * @return A constituent of the entire mention head. The size may be larger than 1.
 */
public static Constituent getConstituent(Constituent curToken, Classifier classifier, boolean isGold) {
    View bioView = curToken.getTextAnnotation().getView("BIO");
    String goldType = "NA";
    if (isGold) {
        if (!curToken.getAttribute("BIO").startsWith("O")) {
            goldType = (curToken.getAttribute("BIO").split("-"))[1];
        }
    }
    List<String> predictedTypes = new ArrayList<>();
    if (!isGold) {
        predictedTypes.add((inference(curToken, classifier).split("-"))[1]);
    }
    int startIdx = curToken.getStartSpan();
    int endIdx = startIdx + 1;
    // if and only if the start predicted BIOLU tag is "B"
    if (inference(curToken, classifier).startsWith("B") && endIdx < bioView.getEndSpan()) {
        String preBIOLevel2_dup = curToken.getAttribute("preBIOLevel1");
        String preBIOLevel1_dup = inference(curToken, classifier);
        Constituent pointerToken = null;
        while (endIdx < bioView.getEndSpan()) {
            pointerToken = bioView.getConstituentsCoveringToken(endIdx).get(0);
            pointerToken.addAttribute("preBIOLevel1", preBIOLevel1_dup);
            pointerToken.addAttribute("preBIOLevel2", preBIOLevel2_dup);
            if (isGold) {
                String curGold = pointerToken.getAttribute("BIO");
                if (!(curGold.startsWith("I") || curGold.startsWith("L"))) {
                    break;
                }
            } else {
                String curPrediction = inference(pointerToken, classifier);
                if (!(curPrediction.startsWith("I") || curPrediction.startsWith("L"))) {
                    break;
                }
                predictedTypes.add(curPrediction.split("-")[1]);
            }
            preBIOLevel2_dup = preBIOLevel1_dup;
            preBIOLevel1_dup = inference(pointerToken, classifier);
            endIdx++;
        }
    }
    String entityType = goldType;
    String entityMentionType = curToken.getAttribute("EntityMentionType");
    if (!isGold) {
        entityType = mostCommon(predictedTypes);
        String className = classifier.getClass().toString();
        // The className variable is in form "...bio_classifier_[TYPE]"
        // Take the last three characters which stands for the mention level.
        entityMentionType = className.substring(className.length() - 3).toUpperCase();
    }
    Constituent wholeMention = new Constituent(entityMentionType + "-" + entityType, 1.0f, "BIO_Mention", curToken.getTextAnnotation(), startIdx, endIdx);
    wholeMention.addAttribute("EntityType", entityType);
    wholeMention.addAttribute("EntityMentionType", entityMentionType);
    return wholeMention;
}
Also used : ArrayList(java.util.ArrayList) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 13 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class BIOTester method statistics.

public static void statistics() {
    int ace_nam = 0;
    int ace_nom = 0;
    int ace_pro = 0;
    int ere_nam = 0;
    int ere_nom = 0;
    int ere_pro = 0;
    int tac_nam = 0;
    int tac_nom = 0;
    try {
        ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer("data/all", false);
        for (TextAnnotation ta : aceReader) {
            for (Constituent c : ta.getView(ViewNames.MENTION_ACE)) {
                if (c.getAttribute("EntityMentionType").equals("NAM")) {
                    ace_nam++;
                }
                if (c.getAttribute("EntityMentionType").equals("NOM")) {
                    ace_nom++;
                }
                if (c.getAttribute("EntityMentionType").equals("PRO")) {
                    ace_pro++;
                }
            }
        }
        EREMentionRelationReader ereReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
        for (XmlTextAnnotation xta : ereReader) {
            TextAnnotation ta = xta.getTextAnnotation();
            for (Constituent c : ta.getView(ViewNames.MENTION_ERE)) {
                if (c.getAttribute("EntityMentionType").equals("NAM")) {
                    ere_nam++;
                }
                if (c.getAttribute("EntityMentionType").equals("NOM")) {
                    ere_nom++;
                }
                if (c.getAttribute("EntityMentionType").equals("PRO")) {
                    ere_pro++;
                }
            }
        }
        ColumnFormatReader columnFormatReader = new ColumnFormatReader("data/tac/2016.nam");
        for (TextAnnotation ta : columnFormatReader) {
            for (Constituent c : ta.getView("MENTIONS")) {
                tac_nam++;
            }
        }
        columnFormatReader = new ColumnFormatReader("data/tac/2016.nom");
        for (TextAnnotation ta : columnFormatReader) {
            for (Constituent c : ta.getView("MENTIONS")) {
                tac_nom++;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("ACE_NAM: " + ace_nam);
    System.out.println("ACE_NOM: " + ace_nom);
    System.out.println("ACE_PRO: " + ace_pro);
    System.out.println("ERE_NAM: " + ere_nam);
    System.out.println("ERE_NOM: " + ere_nom);
    System.out.println("ERE_PRO: " + ere_pro);
    System.out.println("TAC_NAM: " + tac_nam);
    System.out.println("TAC_NOM: " + tac_nom);
}
Also used : EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) ACEReaderWithTrueCaseFixer(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer) XmlTextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) XmlTextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) DatastoreException(org.cogcomp.DatastoreException) JWNLException(net.didion.jwnl.JWNLException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) IOException(java.io.IOException) InvalidPortException(io.minio.errors.InvalidPortException)

Example 14 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class AnnotatorTester method test_custom_annotator.

public static void test_custom_annotator() {
    ACEReader aceReader = null;
    POSAnnotator posAnnotator = new POSAnnotator();
    int total_labeled = 0;
    int total_predicted = 0;
    int total_correct = 0;
    int total_type_correct = 0;
    int total_extent_correct = 0;
    try {
        aceReader = new ACEReader("data/partition_with_dev/dev", false);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
        for (TextAnnotation ta : aceReader) {
            ta.addView(posAnnotator);
            mentionAnnotator.addView(ta);
            total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
            total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
            for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
                for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
                    gc.addAttribute("EntityType", gc.getLabel());
                    Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
                    if (gch == null) {
                        continue;
                    }
                    if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
                        total_correct++;
                        if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
                            total_type_correct++;
                        }
                        if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
                            total_extent_correct++;
                        }
                        break;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Labeled: " + total_labeled);
    System.out.println("Predicted: " + total_predicted);
    System.out.println("Correct: " + total_correct);
    System.out.println("Type Correct: " + total_type_correct);
    System.out.println("Extent Correct: " + total_extent_correct);
}
Also used : ACEReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 15 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class AnnotatorTester method test_basic_annotator.

/**
 * By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
 */
public static void test_basic_annotator() {
    ACEReader aceReader = null;
    POSAnnotator posAnnotator = new POSAnnotator();
    int total_labeled = 0;
    int total_predicted = 0;
    int total_correct = 0;
    int total_type_correct = 0;
    int total_extent_correct = 0;
    try {
        aceReader = new ACEReader("data/partition_with_dev/dev", false);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_NONTYPE");
        for (TextAnnotation ta : aceReader) {
            ta.addView(posAnnotator);
            mentionAnnotator.addView(ta);
            total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
            total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
            for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
                for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
                    gc.addAttribute("EntityType", gc.getLabel());
                    Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
                    if (gch == null) {
                        continue;
                    }
                    if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
                        total_correct++;
                        if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
                            total_type_correct++;
                        }
                        if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
                            total_extent_correct++;
                        }
                        break;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Labeled: " + total_labeled);
    System.out.println("Predicted: " + total_predicted);
    System.out.println("Correct: " + total_correct);
    System.out.println("Type Correct: " + total_type_correct);
    System.out.println("Extent Correct: " + total_extent_correct);
}
Also used : ACEReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)227 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)121 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)66 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)43 ArrayList (java.util.ArrayList)37 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)28 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)25 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)22 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)21 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)18 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)18 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)14 HashSet (java.util.HashSet)13