Search in sources :

Example 1 with EREEventReader

use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader in project cogcomp-nlp by CogComp.

the class CreateTrainDevTestSplit method main.

/**
     * read from the cache.
     * @param args
     */
public static void main(String[] args) {
    if (args.length != 3) {
        System.err.println("Usage: " + NAME + " EreCorpusType corpusDir splitDir");
        System.exit(-1);
    }
    EREDocumentReader.EreCorpus ereCorpus = EREDocumentReader.EreCorpus.valueOf(args[0]);
    String corpusRoot = args[1];
    String outDir = args[2];
    ResourceManager fullRm = new CorpusSplitConfigurator().getDefaultConfig();
    boolean throwExceptionOnXmlParserFail = false;
    double trainFrac = fullRm.getDouble(CorpusSplitConfigurator.TRAIN_FRACTION.key);
    double devFrac = fullRm.getDouble(CorpusSplitConfigurator.DEV_FRACTION.key);
    double testFrac = fullRm.getDouble(CorpusSplitConfigurator.TEST_FRACTION.key);
    //        Path corpusPath = Paths.get(corpusRoot);
    //        String corpusName = corpusPath.getName(corpusPath.getNameCount() - 2).toString();
    IOUtils.mkdir(outDir);
    String outFileStem = outDir + "/";
    //{ViewNames.EVENT_ERE};
    String[] viewNames = fullRm.getCommaSeparatedValues(CorpusSplitConfigurator.VIEWS_TO_CONSIDER.key);
    String[] labelsToCount = {};
    EREMentionRelationReader reader = null;
    try {
        reader = new EREEventReader(ereCorpus, corpusRoot, throwExceptionOnXmlParserFail);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
    Map<String, XmlTextAnnotation> ereTas = new HashMap<>();
    Map<String, Set<View>> ereViews = new HashMap<>();
    while (reader.hasNext()) {
        XmlTextAnnotation xmlTextAnnotation = reader.next();
        ereTas.put(xmlTextAnnotation.getTextAnnotation().getId(), xmlTextAnnotation);
        Set<View> views = new HashSet<>();
        TextAnnotation ta = xmlTextAnnotation.getTextAnnotation();
        for (String viewName : viewNames) if (ta.hasView(viewName))
            views.add(ta.getView(viewName));
        ereViews.put(ta.getId(), views);
    }
    CreateTrainDevTestSplit creator = new CreateTrainDevTestSplit(ereViews, labelsToCount);
    Map<Split, Set<String>> splits = creator.getSplits(trainFrac, devFrac, testFrac);
    Map<Split, Counter<String>> splitCounts = creator.getBestRelSplitCounts();
    Map<String, Counter<String>> counts = creator.getLabelCounts();
    List<String> outLines = new ArrayList<>(splitCounts.size() + 2);
    for (String docId : counts.keySet()) {
        outLines.add(docId + ": " + printCounts(counts.get(docId)));
    }
    for (Split s : splitCounts.keySet()) {
        outLines.add(s.name() + ": " + printCounts(splitCounts.get(s)));
    }
    Counter<String> totalLabelCounts = creator.getLabelTotals();
    outLines.add("TOTALS: " + printCounts(totalLabelCounts));
    try {
        LineIO.write(outFileStem + "countInfo.txt", outLines);
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    for (Split s : splits.keySet()) {
        List<String> ids = new ArrayList<>(splits.get(s));
        try {
            LineIO.write(outFileStem + s.name() + ".txt", ids);
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }
}
Also used : EREDocumentReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader) Counter(edu.illinois.cs.cogcomp.core.stats.Counter) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) IOException(java.io.IOException) EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) EREEventReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader)

Example 2 with EREEventReader

use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader in project cogcomp-nlp by CogComp.

the class MultilingualEreReaderTest method testChinese.

public static void testChinese() {
    EREEventReader reader = null;
    try {
        boolean throwExceptionOnXmlParseFail = true;
        TextAnnotationBuilder chineseTaBldr = MultiLingualTokenizer.getTokenizer(Language.Chinese.getCode());
        reader = new EREEventReader(EREDocumentReader.EreCorpus.ENR3, chineseTaBldr, chinesePathB, throwExceptionOnXmlParseFail);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    testReader(reader);
}
Also used : TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) EREEventReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader)

Example 3 with EREEventReader

use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader in project cogcomp-nlp by CogComp.

the class EREReaderTest method runEventReader.

private static XmlTextAnnotation runEventReader(String corpusDir, String wantedId) {
    EREEventReader emr = null;
    try {
        boolean throwExceptionOnXmlTagMismatch = true;
        emr = new EREEventReader(EreCorpus.ENR3, corpusDir, throwExceptionOnXmlTagMismatch);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
    assert (emr.hasNext());
    XmlTextAnnotation outputXmlTa = null;
    do {
        outputXmlTa = emr.next();
    } while (!outputXmlTa.getTextAnnotation().getId().equals(wantedId) && emr.hasNext());
    if (!outputXmlTa.getTextAnnotation().getId().equals(wantedId))
        fail("ERROR: didn't find corpus entry with id '" + wantedId + "'.");
    TextAnnotation output = outputXmlTa.getTextAnnotation();
    assert (output.hasView(ViewNames.MENTION_ERE));
    View nerRelation = output.getView(ViewNames.MENTION_ERE);
    assert (nerRelation.getConstituents().size() > 0);
    assert (output.hasView(ViewNames.EVENT_ERE));
    PredicateArgumentView eventView = (PredicateArgumentView) output.getView(emr.getEventViewName());
    assert (eventView.getConstituents().size() > 0);
    List<Constituent> triggers = eventView.getPredicates();
    assert (triggers.size() > 0);
    List<Relation> args = eventView.getArguments(triggers.get(0));
    assert (args.get(0).getAttribute(ORIGIN) != null);
    assert (args.get(0).getAttribute(REALIS) != null);
    System.out.println(eventView.toString());
    String report = emr.generateReport();
    System.out.println("Event Reader report:\n\n" + report);
    return outputXmlTa;
}
Also used : EREEventReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader) IOException(java.io.IOException)

Example 4 with EREEventReader

use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader in project cogcomp-nlp by CogComp.

the class MultilingualEreReaderTest method testSpanish.

public static void testSpanish() {
    //        String lang = "es"; //spanish
    //        TextAnnotationBuilder taBldr = MultiLingualTokenizer.getTokenizer(lang);
    EREEventReader reader = null;
    try {
        boolean throwExceptionOnXmlParseFail = true;
        TextAnnotationBuilder spanishTaBldr = MultiLingualTokenizer.getTokenizer(Language.Spanish.getCode());
        reader = new EREEventReader(EREDocumentReader.EreCorpus.ENR2, spanishTaBldr, spanishPathA, throwExceptionOnXmlParseFail);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    testReader(reader);
}
Also used : TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) EREEventReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader)

Aggregations

EREEventReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREEventReader)4 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)2 IOException (java.io.IOException)2 Counter (edu.illinois.cs.cogcomp.core.stats.Counter)1 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)1 EREDocumentReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader)1 EREMentionRelationReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader)1