use of edu.illinois.cs.cogcomp.sl.util.FeatureVector in project cogcomp-nlp by CogComp.
the class PreExtractor method consumeInstance.
protected void consumeInstance(SenseInstance x, SenseStructure y) throws Exception {
countFeatures(x);
synchronized (buffer) {
FeatureVector fv = x.getCachedFeatureVector();
assert fv != null;
buffer.add(new PreExtractRecord(x.getPredicateLemma(), y.getLabel(), fv));
}
if (buffer.size() > 10000) {
synchronized (buffer) {
if (buffer.size() > 10000) {
for (PreExtractRecord r : buffer) {
cacheDB.put(r.lemma, r.label, r.features);
}
buffer.clear();
}
}
}
}
use of edu.illinois.cs.cogcomp.sl.util.FeatureVector in project cogcomp-nlp by CogComp.
the class PruningPreExtractor method consume.
@Override
protected void consume(Pair<SenseInstance, SenseStructure> input) {
SenseInstance x = input.getFirst();
SenseStructure y = input.getSecond();
FeatureVector features = x.getCachedFeatureVector();
ModelInfo modelInfo = manager.getModelInfo();
Lexicon lexicon = modelInfo.getLexicon();
int threshold = manager.getPruneSize();
Pair<int[], float[]> pair = lexicon.pruneFeaturesByCount(features.getIdx(), features.getValue(), threshold);
features = new FeatureVector(pair.getFirst(), pair.getSecond());
synchronized (buffer) {
buffer.add(new PreExtractRecord(x.getPredicateLemma(), y.getLabel(), features));
}
if (buffer.size() > 10000) {
synchronized (buffer) {
if (buffer.size() > 10000) {
for (PreExtractRecord r : buffer) {
try {
cache.put(r.lemma, r.label, r.features);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
buffer.clear();
}
}
}
counter.incrementAndGet();
}
use of edu.illinois.cs.cogcomp.sl.util.FeatureVector in project cogcomp-nlp by CogComp.
the class SenseInstance method cacheFeatureVector.
public void cacheFeatureVector(Set<Feature> features) {
Map<String, Float> featureMap = new HashMap<>();
for (Feature f : features) {
featureMap.put(f.getName(), f.getValue());
}
ModelInfo modelInfo = manager.getModelInfo();
Pair<int[], float[]> feats = modelInfo.getLexicon().getFeatureVector(featureMap);
this.cacheFeatureVector(new FeatureVector(feats.getFirst(), feats.getSecond()));
}
use of edu.illinois.cs.cogcomp.sl.util.FeatureVector in project cogcomp-nlp by CogComp.
the class PreExtractor method countFeatures.
/**
* This is where actual feature extraction is taking place. The features are defined in the
* <b>features.fex</b> file and are read by {@link FeatureExtractor}
*
* @param x The predicate to extract features from.
* @throws EdisonException
*/
public void countFeatures(SenseInstance x) throws EdisonException {
ModelInfo modelInfo = manager.getModelInfo();
Set<Feature> feats = modelInfo.fex.getFeatures(x.getConstituent());
// This is the only place where a new feature can be added to the lexicon.
List<Integer> ids = new ArrayList<>();
List<Float> values = new ArrayList<>();
synchronized (lexicon) {
for (Feature f : feats) {
if (addNewFeatures) {
if (!lexicon.contains(f.getName())) {
lexicon.previewFeature(f.getName());
}
} else if (!lexicon.contains(f.getName())) {
continue;
}
int featureId = lexicon.lookupId(f.getName());
lexicon.countFeature(featureId);
ids.add(featureId);
values.add(f.getValue());
}
}
x.cacheFeatureVector(new FeatureVector(ArrayUtilities.asIntArray(ids), ArrayUtilities.asFloatArray(values)));
}
Aggregations