Search in sources :

Example 1 with FeatureVectorCacheFile

use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.

the class VerbSenseClassifierMain method pruneFeatures.

private static void pruneFeatures(int numConsumers, SenseManager manager, FeatureVectorCacheFile featureCache, String cacheFile2) throws Exception {
    if (IOUtils.exists(cacheFile2)) {
        log.warn("Old pruned cache file found. Deleting...");
        IOUtils.rm(cacheFile2);
        log.info("Done");
    }
    log.info("Pruning features. Saving pruned features to {}", cacheFile2);
    FeatureVectorCacheFile prunedfeatureCache = new FeatureVectorCacheFile(cacheFile2, manager);
    PruningPreExtractor p1 = new PruningPreExtractor(manager, featureCache, prunedfeatureCache, numConsumers);
    p1.run();
    p1.finalize();
}
Also used : PruningPreExtractor(edu.illinois.cs.cogcomp.verbsense.experiment.PruningPreExtractor) FeatureVectorCacheFile(edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile)

Example 2 with FeatureVectorCacheFile

use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.

the class VerbSenseClassifierMain method preExtract.

@CommandDescription(description = "Pre-extracts the features for the verb-sense model. Run this before training.", usage = "preExtract")
public static void preExtract() throws Exception {
    SenseManager manager = getManager(true);
    ResourceManager conf = new VerbSenseConfigurator().getDefaultConfig();
    // If models directory doesn't exist create it
    if (!IOUtils.isDirectory(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY))))
        IOUtils.mkdir(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY)));
    int numConsumers = Runtime.getRuntime().availableProcessors();
    Dataset dataset = Dataset.PTBTrainDev;
    log.info("Pre-extracting features");
    ModelInfo modelInfo = manager.getModelInfo();
    String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
    String allDataCacheFile = VerbSenseConfigurator.getFeatureCacheFile(featureSet, dataset, rm);
    FeatureVectorCacheFile featureCache = preExtract(numConsumers, manager, dataset, allDataCacheFile);
    pruneFeatures(numConsumers, manager, featureCache, VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm));
    Lexicon lexicon = modelInfo.getLexicon().getPrunedLexicon(manager.getPruneSize());
    log.info("Saving lexicon  with {} features to {}", lexicon.size(), manager.getLexiconFileName());
    log.info(lexicon.size() + " features in the lexicon");
    lexicon.save(manager.getLexiconFileName());
}
Also used : ModelInfo(edu.illinois.cs.cogcomp.verbsense.core.ModelInfo) VerbSenseConfigurator(edu.illinois.cs.cogcomp.verbsense.utilities.VerbSenseConfigurator) Dataset(edu.illinois.cs.cogcomp.verbsense.data.Dataset) Lexicon(edu.illinois.cs.cogcomp.core.datastructures.Lexicon) SenseManager(edu.illinois.cs.cogcomp.verbsense.core.SenseManager) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) FeatureVectorCacheFile(edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile) CommandDescription(edu.illinois.cs.cogcomp.core.utilities.commands.CommandDescription)

Example 3 with FeatureVectorCacheFile

use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.

the class VerbSenseClassifierMain method preExtract.

private static FeatureVectorCacheFile preExtract(int numConsumers, SenseManager manager, Dataset dataset, String cacheFile) throws Exception {
    if (IOUtils.exists(cacheFile)) {
        log.warn("Old cache file found. Deleting...");
        IOUtils.rm(cacheFile);
        log.info("Done");
    }
    FeatureVectorCacheFile featureCache = new FeatureVectorCacheFile(cacheFile, manager);
    Iterator<TextAnnotation> data = SentenceDBHandler.instance.getDataset(dataset);
    PreExtractor p = new PreExtractor(manager, data, numConsumers, featureCache);
    p.run();
    p.finalize();
    return featureCache;
}
Also used : PreExtractor(edu.illinois.cs.cogcomp.verbsense.experiment.PreExtractor) PruningPreExtractor(edu.illinois.cs.cogcomp.verbsense.experiment.PruningPreExtractor) FeatureVectorCacheFile(edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 4 with FeatureVectorCacheFile

use of edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile in project cogcomp-nlp by CogComp.

the class VerbSenseClassifierMain method train.

@CommandDescription(description = "Trains the verb-sense model.", usage = "train")
public static void train() throws Exception {
    SenseManager manager = getManager(true);
    int numThreads = Runtime.getRuntime().availableProcessors();
    ModelInfo modelInfo = manager.getModelInfo();
    String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
    String cacheFile = VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm);
    AbstractInferenceSolver[] inference = new AbstractInferenceSolver[numThreads];
    // TODO Can I replace this with ILPInference?
    for (int i = 0; i < inference.length; i++) inference[i] = new MulticlassInference(manager);
    double c;
    FeatureVectorCacheFile cache;
    cache = new FeatureVectorCacheFile(cacheFile, manager);
    StructuredProblem cvProblem = cache.getStructuredProblem(20000);
    cache.close();
    LearnerParameters params = JLISLearner.crossvalStructSVMSense(cvProblem, inference, 4);
    c = params.getcStruct();
    log.info("c = {} after cv", c);
    cache = new FeatureVectorCacheFile(cacheFile, manager);
    StructuredProblem problem = cache.getStructuredProblem();
    cache.close();
    WeightVector w = JLISLearner.trainStructSVM(inference, problem, c);
    JLISLearner.saveWeightVector(w, manager.getModelFileName());
}
Also used : ModelInfo(edu.illinois.cs.cogcomp.verbsense.core.ModelInfo) StructuredProblem(edu.illinois.cs.cogcomp.sl.core.StructuredProblem) WeightVector(edu.illinois.cs.cogcomp.sl.util.WeightVector) SenseManager(edu.illinois.cs.cogcomp.verbsense.core.SenseManager) MulticlassInference(edu.illinois.cs.cogcomp.verbsense.inference.MulticlassInference) AbstractInferenceSolver(edu.illinois.cs.cogcomp.sl.inference.AbstractInferenceSolver) FeatureVectorCacheFile(edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile) LearnerParameters(edu.illinois.cs.cogcomp.verbsense.learn.LearnerParameters) CommandDescription(edu.illinois.cs.cogcomp.core.utilities.commands.CommandDescription)

Aggregations

FeatureVectorCacheFile (edu.illinois.cs.cogcomp.verbsense.caches.FeatureVectorCacheFile)4 CommandDescription (edu.illinois.cs.cogcomp.core.utilities.commands.CommandDescription)2 ModelInfo (edu.illinois.cs.cogcomp.verbsense.core.ModelInfo)2 SenseManager (edu.illinois.cs.cogcomp.verbsense.core.SenseManager)2 PruningPreExtractor (edu.illinois.cs.cogcomp.verbsense.experiment.PruningPreExtractor)2 Lexicon (edu.illinois.cs.cogcomp.core.datastructures.Lexicon)1 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)1 StructuredProblem (edu.illinois.cs.cogcomp.sl.core.StructuredProblem)1 AbstractInferenceSolver (edu.illinois.cs.cogcomp.sl.inference.AbstractInferenceSolver)1 WeightVector (edu.illinois.cs.cogcomp.sl.util.WeightVector)1 Dataset (edu.illinois.cs.cogcomp.verbsense.data.Dataset)1 PreExtractor (edu.illinois.cs.cogcomp.verbsense.experiment.PreExtractor)1 MulticlassInference (edu.illinois.cs.cogcomp.verbsense.inference.MulticlassInference)1 LearnerParameters (edu.illinois.cs.cogcomp.verbsense.learn.LearnerParameters)1 VerbSenseConfigurator (edu.illinois.cs.cogcomp.verbsense.utilities.VerbSenseConfigurator)1