Search in sources :

Example 1 with ERENerReader

use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.ERENerReader in project cogcomp-nlp by CogComp.

the class EREReaderTest method runTest.

private static XmlTextAnnotation runTest(EreCorpus ereCorpus, String corpusRoot) {
    ERENerReader nerReader = null;
    boolean addNominalMentions = true;
    boolean throwExceptionOnXmlTagMismatch = true;
    try {
        nerReader = new EREMentionRelationReader(ereCorpus, corpusRoot, throwExceptionOnXmlTagMismatch);
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("ERROR: " + NAME + ": couldn't instantiate ERENerReader for ERE release " + ereCorpus.name() + ": " + e.getMessage());
    }
    XmlTextAnnotation outputXmlTa = nerReader.next();
    TextAnnotation output = outputXmlTa.getTextAnnotation();
    View nerEre = null;
    if (addNominalMentions) {
        assert (output.hasView(ViewNames.MENTION_ERE));
        nerEre = output.getView(ViewNames.MENTION_ERE);
    } else {
        assert (output.hasView(ViewNames.NER_ERE));
        nerEre = output.getView(ViewNames.NER_ERE);
    }
    assert (nerEre.getConstituents().size() > 0);
    StringTransformation xmlSt = outputXmlTa.getXmlSt();
    String origXmlStr = xmlSt.getOrigText();
    System.out.println("ERENerReader found " + nerEre.getConstituents().size() + " NER constituents: ");
    for (Constituent c : nerEre.getConstituents()) {
        System.out.println(TextAnnotationPrintHelper.printConstituent(c));
        int start = c.getStartCharOffset();
        int end = c.getEndCharOffset();
        IntPair origOffsets = xmlSt.getOriginalOffsets(start, end);
        String origStr = origXmlStr.substring(origOffsets.getFirst(), origOffsets.getSecond());
        System.out.println("Constituent (clean) text: '" + c.getSurfaceForm() + "'");
        System.out.println("Original text: '" + origStr + "'\n---------\n");
    }
    System.out.println("Report: " + nerReader.generateReport());
    return outputXmlTa;
}
Also used : EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) ERENerReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.ERENerReader) StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IOException(java.io.IOException)

Aggregations

IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)1 StringTransformation (edu.illinois.cs.cogcomp.core.utilities.StringTransformation)1 EREMentionRelationReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader)1 ERENerReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.ERENerReader)1 IOException (java.io.IOException)1