use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class TextCleanerTest method setUp.
@Before
public void setUp() throws Exception {
ResourceManager rm = new ResourceManager(CONFIG);
textCleaner = new TextCleaner(rm);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class CorpusReaderConfigurator method buildResourceManager.
public static ResourceManager buildResourceManager(String corpus) {
Properties props = new Properties();
props.setProperty(CorpusReaderConfigurator.CORPUS_NAME.key, corpus);
return new ResourceManager(props);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class CreateTrainDevTestSplit method main.
/**
* read from the cache.
* @param args
*/
public static void main(String[] args) {
if (args.length != 3) {
System.err.println("Usage: " + NAME + " EreCorpusType corpusDir splitDir");
System.exit(-1);
}
EREDocumentReader.EreCorpus ereCorpus = EREDocumentReader.EreCorpus.valueOf(args[0]);
String corpusRoot = args[1];
String outDir = args[2];
ResourceManager fullRm = new CorpusSplitConfigurator().getDefaultConfig();
boolean throwExceptionOnXmlParserFail = false;
double trainFrac = fullRm.getDouble(CorpusSplitConfigurator.TRAIN_FRACTION.key);
double devFrac = fullRm.getDouble(CorpusSplitConfigurator.DEV_FRACTION.key);
double testFrac = fullRm.getDouble(CorpusSplitConfigurator.TEST_FRACTION.key);
// Path corpusPath = Paths.get(corpusRoot);
// String corpusName = corpusPath.getName(corpusPath.getNameCount() - 2).toString();
IOUtils.mkdir(outDir);
String outFileStem = outDir + "/";
//{ViewNames.EVENT_ERE};
String[] viewNames = fullRm.getCommaSeparatedValues(CorpusSplitConfigurator.VIEWS_TO_CONSIDER.key);
String[] labelsToCount = {};
EREMentionRelationReader reader = null;
try {
reader = new EREEventReader(ereCorpus, corpusRoot, throwExceptionOnXmlParserFail);
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
Map<String, XmlTextAnnotation> ereTas = new HashMap<>();
Map<String, Set<View>> ereViews = new HashMap<>();
while (reader.hasNext()) {
XmlTextAnnotation xmlTextAnnotation = reader.next();
ereTas.put(xmlTextAnnotation.getTextAnnotation().getId(), xmlTextAnnotation);
Set<View> views = new HashSet<>();
TextAnnotation ta = xmlTextAnnotation.getTextAnnotation();
for (String viewName : viewNames) if (ta.hasView(viewName))
views.add(ta.getView(viewName));
ereViews.put(ta.getId(), views);
}
CreateTrainDevTestSplit creator = new CreateTrainDevTestSplit(ereViews, labelsToCount);
Map<Split, Set<String>> splits = creator.getSplits(trainFrac, devFrac, testFrac);
Map<Split, Counter<String>> splitCounts = creator.getBestRelSplitCounts();
Map<String, Counter<String>> counts = creator.getLabelCounts();
List<String> outLines = new ArrayList<>(splitCounts.size() + 2);
for (String docId : counts.keySet()) {
outLines.add(docId + ": " + printCounts(counts.get(docId)));
}
for (Split s : splitCounts.keySet()) {
outLines.add(s.name() + ": " + printCounts(splitCounts.get(s)));
}
Counter<String> totalLabelCounts = creator.getLabelTotals();
outLines.add("TOTALS: " + printCounts(totalLabelCounts));
try {
LineIO.write(outFileStem + "countInfo.txt", outLines);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
for (Split s : splits.keySet()) {
List<String> ids = new ArrayList<>(splits.get(s));
try {
LineIO.write(outFileStem + s.name() + ".txt", ids);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
}
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class AnnotatorLazyInitTest method setUpOnce.
@BeforeClass
public static void setUpOnce() {
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, Configurator.FALSE);
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "6");
defaultRm = new ResourceManager(props);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class AnnotatorLazyInitTest method testLazy.
@Test
public void testLazy() {
SimpleGazetteerAnnotator sga = null;
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "6");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.TRUE);
try {
sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertFalse(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
TextAnnotation ta = tab.createTextAnnotation("The CIA has no London headquarters, though General Electric does.");
try {
sga.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(sga.getViewName()));
assertTrue(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
assertTrue(ta.hasView(sga.getViewName()));
}
Aggregations