use of edu.stanford.nlp.util.concurrent.AtomicDouble in project CoreNLP by stanfordnlp.
the class ScorePhrasesLearnFeatWt method chooseUnknownAsNegatives.
//this chooses the ones that are not close to the positive phrases!
Set<CandidatePhrase> chooseUnknownAsNegatives(Set<CandidatePhrase> candidatePhrases, String label, Collection<CandidatePhrase> positivePhrases, Map<String, Collection<CandidatePhrase>> knownNegativePhrases, BufferedWriter logFile) throws IOException {
List<List<CandidatePhrase>> threadedCandidates = GetPatternsFromDataMultiClass.getThreadBatches(CollectionUtils.toList(candidatePhrases), constVars.numThreads);
Counter<CandidatePhrase> sims = new ClassicCounter<>();
AtomicDouble allMaxSim = new AtomicDouble(Double.MIN_VALUE);
ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
List<Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>>> list = new ArrayList<>();
//multi-threaded choose positive, negative and unknown
for (List<CandidatePhrase> keys : threadedCandidates) {
Callable<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> task = new ComputeSim(label, keys, allMaxSim, positivePhrases, knownNegativePhrases);
Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> submit = executor.submit(task);
list.add(submit);
}
// Now retrieve the result
for (Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> future : list) {
try {
sims.addAll(future.get().first());
} catch (Exception e) {
executor.shutdownNow();
throw new RuntimeException(e);
}
}
executor.shutdown();
if (allMaxSim.get() == Double.MIN_VALUE) {
Redwood.log(Redwood.DBG, "No similarity recorded between the positives and the unknown!");
}
CandidatePhrase k = Counters.argmax(sims);
System.out.println("Maximum similarity was " + sims.getCount(k) + " for word " + k);
Counter<CandidatePhrase> removed = Counters.retainBelow(sims, constVars.positiveSimilarityThresholdLowPrecision);
System.out.println("removing phrases as negative phrases that were higher that positive similarity threshold of " + constVars.positiveSimilarityThresholdLowPrecision + removed);
if (logFile != null && wordVectors != null) {
for (Entry<CandidatePhrase, Double> en : removed.entrySet()) if (wordVectors.containsKey(en.getKey().getPhrase()))
logFile.write(en.getKey() + "-PN " + ArrayUtils.toString(wordVectors.get(en.getKey().getPhrase()), " ") + "\n");
}
return sims.keySet();
}
use of edu.stanford.nlp.util.concurrent.AtomicDouble in project CoreNLP by stanfordnlp.
the class ScorePhrasesLearnFeatWt method computeSimWithWordCluster.
Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordCluster(Collection<CandidatePhrase> candidatePhrases, Collection<CandidatePhrase> positivePhrases, AtomicDouble allMaxSim) {
Counter<CandidatePhrase> sims = new ClassicCounter<>(candidatePhrases.size());
for (CandidatePhrase p : candidatePhrases) {
Counter<Integer> feat = wordClassClustersForPhrase.get(p);
if (feat == null) {
feat = wordClass(p.getPhrase(), p.getPhraseLemma());
wordClassClustersForPhrase.put(p, feat);
}
// Double.MIN_VALUE;
double avgSim = 0;
if (feat.size() > 0) {
for (CandidatePhrase pos : positivePhrases) {
if (p.equals(pos))
continue;
Counter<Integer> posfeat = wordClassClustersForPhrase.get(pos);
if (posfeat == null) {
posfeat = wordClass(pos.getPhrase(), pos.getPhraseLemma());
wordClassClustersForPhrase.put(pos, feat);
}
if (posfeat.size() > 0) {
Double j = Counters.jaccardCoefficient(posfeat, feat);
//System.out.println("clusters for positive phrase " + pos + " is " +wordClassClustersForPhrase.get(pos) + " and the features for unknown are " + feat + " for phrase " + p);
if (!j.isInfinite() && !j.isNaN()) {
avgSim += j;
}
//if (j > maxSim)
// maxSim = j;
}
}
avgSim /= positivePhrases.size();
}
sims.setCount(p, avgSim);
if (allMaxSim.get() < avgSim)
allMaxSim.set(avgSim);
}
//TODO: compute similarity with neg phrases
return new Pair(sims, null);
}
Aggregations