use of edu.illinois.cs.cogcomp.verbsense.data.Dataset in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method preExtract.
@CommandDescription(description = "Pre-extracts the features for the verb-sense model. Run this before training.", usage = "preExtract")
public static void preExtract() throws Exception {
SenseManager manager = getManager(true);
ResourceManager conf = new VerbSenseConfigurator().getDefaultConfig();
// If models directory doesn't exist create it
if (!IOUtils.isDirectory(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY))))
IOUtils.mkdir(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY)));
int numConsumers = Runtime.getRuntime().availableProcessors();
Dataset dataset = Dataset.PTBTrainDev;
log.info("Pre-extracting features");
ModelInfo modelInfo = manager.getModelInfo();
String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
String allDataCacheFile = VerbSenseConfigurator.getFeatureCacheFile(featureSet, dataset, rm);
FeatureVectorCacheFile featureCache = preExtract(numConsumers, manager, dataset, allDataCacheFile);
pruneFeatures(numConsumers, manager, featureCache, VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm));
Lexicon lexicon = modelInfo.getLexicon().getPrunedLexicon(manager.getPruneSize());
log.info("Saving lexicon with {} features to {}", lexicon.size(), manager.getLexiconFileName());
log.info(lexicon.size() + " features in the lexicon");
lexicon.save(manager.getLexiconFileName());
}
use of edu.illinois.cs.cogcomp.verbsense.data.Dataset in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method evaluate.
@CommandDescription(description = "Performs evaluation.", usage = "evaluate")
public static void evaluate() throws Exception {
SenseManager manager = getManager(false);
Dataset testSet = Dataset.PTBTest;
ILPSolverFactory solver = new ILPSolverFactory(ILPSolverFactory.SolverType.JLISCuttingPlaneGurobi);
ClassificationTester senseTester = new ClassificationTester();
long start = System.currentTimeMillis();
int count = 0;
manager.getModelInfo().loadWeightVector();
IResetableIterator<TextAnnotation> dataset = SentenceDBHandler.instance.getDataset(testSet);
while (dataset.hasNext()) {
TextAnnotation ta = dataset.next();
if (!ta.hasView(SenseManager.getGoldViewName()))
continue;
TokenLabelView gold = (TokenLabelView) ta.getView(SenseManager.getGoldViewName());
ILPInference inference = manager.getInference(solver, gold.getConstituents());
assert inference != null;
TokenLabelView prediction = inference.getOutputView();
evaluateSense(gold, prediction, senseTester);
count++;
if (count % 1000 == 0) {
long end = System.currentTimeMillis();
log.info(count + " sentences done. Took " + (end - start) + "ms, Micro-F1 so far = " + senseTester.getMicroF1());
}
}
long end = System.currentTimeMillis();
System.out.println(count + " sentences done. Took " + (end - start) + "ms");
System.out.println("\n\n* Sense");
System.out.println(senseTester.getPerformanceTable(false).toOrgTable());
}
use of edu.illinois.cs.cogcomp.verbsense.data.Dataset in project cogcomp-nlp by CogComp.
the class SentenceDBHandler method initializeDatasets.
public void initializeDatasets(String dbFile) {
Connection connection = DBHelper.getConnection(dbFile);
for (Dataset d : Dataset.values()) {
PreparedStatement stmt;
try {
stmt = connection.prepareStatement("select * from datasets where name = ?");
stmt.setString(1, d.name());
ResultSet rs = stmt.executeQuery();
if (!rs.next()) {
stmt = connection.prepareStatement("insert into datasets(name) values (?)");
stmt.setString(1, d.name());
stmt.executeUpdate();
}
} catch (SQLException e) {
log.error("Error with databse access", e);
throw new RuntimeException(e);
}
}
}
Aggregations