use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method pruneFeatures.
private static void pruneFeatures(int numConsumers, SenseManager manager, FeatureVectorCacheFile featureCache, String cacheFile2) throws Exception {
if (IOUtils.exists(cacheFile2)) {
log.warn("Old pruned cache file found. Deleting...");
IOUtils.rm(cacheFile2);
log.info("Done");
}
log.info("Pruning features. Saving pruned features to {}", cacheFile2);
FeatureVectorCacheFile prunedfeatureCache = new FeatureVectorCacheFile(cacheFile2, manager);
PruningPreExtractor p1 = new PruningPreExtractor(manager, featureCache, prunedfeatureCache, numConsumers);
p1.run();
p1.finalize();
}
use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method preExtract.
@CommandDescription(description = "Pre-extracts the features for the verb-sense model. Run this before training.", usage = "preExtract")
public static void preExtract() throws Exception {
SenseManager manager = getManager(true);
ResourceManager conf = new VerbSenseConfigurator().getDefaultConfig();
// If models directory doesn't exist create it
if (!IOUtils.isDirectory(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY))))
IOUtils.mkdir(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY)));
int numConsumers = Runtime.getRuntime().availableProcessors();
Dataset dataset = Dataset.PTBTrainDev;
log.info("Pre-extracting features");
ModelInfo modelInfo = manager.getModelInfo();
String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
String allDataCacheFile = VerbSenseConfigurator.getFeatureCacheFile(featureSet, dataset, rm);
FeatureVectorCacheFile featureCache = preExtract(numConsumers, manager, dataset, allDataCacheFile);
pruneFeatures(numConsumers, manager, featureCache, VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm));
Lexicon lexicon = modelInfo.getLexicon().getPrunedLexicon(manager.getPruneSize());
log.info("Saving lexicon with {} features to {}", lexicon.size(), manager.getLexiconFileName());
log.info(lexicon.size() + " features in the lexicon");
lexicon.save(manager.getLexiconFileName());
}
use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method preExtract.
private static FeatureVectorCacheFile preExtract(int numConsumers, SenseManager manager, Dataset dataset, String cacheFile) throws Exception {
if (IOUtils.exists(cacheFile)) {
log.warn("Old cache file found. Deleting...");
IOUtils.rm(cacheFile);
log.info("Done");
}
FeatureVectorCacheFile featureCache = new FeatureVectorCacheFile(cacheFile, manager);
Iterator<TextAnnotation> data = SentenceDBHandler.instance.getDataset(dataset);
PreExtractor p = new PreExtractor(manager, data, numConsumers, featureCache);
p.run();
p.finalize();
return featureCache;
}
use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.
the class VerbSenseClassifierMain method train.
@CommandDescription(description = "Trains the verb-sense model.", usage = "train")
public static void train() throws Exception {
SenseManager manager = getManager(true);
int numThreads = Runtime.getRuntime().availableProcessors();
ModelInfo modelInfo = manager.getModelInfo();
String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
String cacheFile = VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm);
AbstractInferenceSolver[] inference = new AbstractInferenceSolver[numThreads];
// TODO Can I replace this with ILPInference?
for (int i = 0; i < inference.length; i++) inference[i] = new MulticlassInference(manager);
double c;
FeatureVectorCacheFile cache;
cache = new FeatureVectorCacheFile(cacheFile, manager);
StructuredProblem cvProblem = cache.getStructuredProblem(20000);
cache.close();
LearnerParameters params = JLISLearner.crossvalStructSVMSense(cvProblem, inference, 4);
c = params.getcStruct();
log.info("c = {} after cv", c);
cache = new FeatureVectorCacheFile(cacheFile, manager);
StructuredProblem problem = cache.getStructuredProblem();
cache.close();
WeightVector w = JLISLearner.trainStructSVM(inference, problem, c);
JLISLearner.saveWeightVector(w, manager.getModelFileName());
}
Aggregations