Search in sources :

Example 1 with AtomicDouble

use of edu.stanford.nlp.util.concurrent.AtomicDouble in project CoreNLP by stanfordnlp.

the class ScorePhrasesLearnFeatWt method chooseUnknownAsNegatives.

//this chooses the ones that are not close to the positive phrases!
Set<CandidatePhrase> chooseUnknownAsNegatives(Set<CandidatePhrase> candidatePhrases, String label, Collection<CandidatePhrase> positivePhrases, Map<String, Collection<CandidatePhrase>> knownNegativePhrases, BufferedWriter logFile) throws IOException {
    List<List<CandidatePhrase>> threadedCandidates = GetPatternsFromDataMultiClass.getThreadBatches(CollectionUtils.toList(candidatePhrases), constVars.numThreads);
    Counter<CandidatePhrase> sims = new ClassicCounter<>();
    AtomicDouble allMaxSim = new AtomicDouble(Double.MIN_VALUE);
    ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
    List<Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>>> list = new ArrayList<>();
    //multi-threaded choose positive, negative and unknown
    for (List<CandidatePhrase> keys : threadedCandidates) {
        Callable<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> task = new ComputeSim(label, keys, allMaxSim, positivePhrases, knownNegativePhrases);
        Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> submit = executor.submit(task);
        list.add(submit);
    }
    // Now retrieve the result
    for (Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> future : list) {
        try {
            sims.addAll(future.get().first());
        } catch (Exception e) {
            executor.shutdownNow();
            throw new RuntimeException(e);
        }
    }
    executor.shutdown();
    if (allMaxSim.get() == Double.MIN_VALUE) {
        Redwood.log(Redwood.DBG, "No similarity recorded between the positives and the unknown!");
    }
    CandidatePhrase k = Counters.argmax(sims);
    System.out.println("Maximum similarity was " + sims.getCount(k) + " for word " + k);
    Counter<CandidatePhrase> removed = Counters.retainBelow(sims, constVars.positiveSimilarityThresholdLowPrecision);
    System.out.println("removing phrases as negative phrases that were higher that positive similarity threshold of " + constVars.positiveSimilarityThresholdLowPrecision + removed);
    if (logFile != null && wordVectors != null) {
        for (Entry<CandidatePhrase, Double> en : removed.entrySet()) if (wordVectors.containsKey(en.getKey().getPhrase()))
            logFile.write(en.getKey() + "-PN " + ArrayUtils.toString(wordVectors.get(en.getKey().getPhrase()), " ") + "\n");
    }
    return sims.keySet();
}
Also used : AtomicDouble(edu.stanford.nlp.util.concurrent.AtomicDouble) AtomicDouble(edu.stanford.nlp.util.concurrent.AtomicDouble) IOException(java.io.IOException) ConcurrentHashCounter(edu.stanford.nlp.util.concurrent.ConcurrentHashCounter)

Example 2 with AtomicDouble

use of edu.stanford.nlp.util.concurrent.AtomicDouble in project CoreNLP by stanfordnlp.

the class ScorePhrasesLearnFeatWt method computeSimWithWordCluster.

Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordCluster(Collection<CandidatePhrase> candidatePhrases, Collection<CandidatePhrase> positivePhrases, AtomicDouble allMaxSim) {
    Counter<CandidatePhrase> sims = new ClassicCounter<>(candidatePhrases.size());
    for (CandidatePhrase p : candidatePhrases) {
        Counter<Integer> feat = wordClassClustersForPhrase.get(p);
        if (feat == null) {
            feat = wordClass(p.getPhrase(), p.getPhraseLemma());
            wordClassClustersForPhrase.put(p, feat);
        }
        // Double.MIN_VALUE;
        double avgSim = 0;
        if (feat.size() > 0) {
            for (CandidatePhrase pos : positivePhrases) {
                if (p.equals(pos))
                    continue;
                Counter<Integer> posfeat = wordClassClustersForPhrase.get(pos);
                if (posfeat == null) {
                    posfeat = wordClass(pos.getPhrase(), pos.getPhraseLemma());
                    wordClassClustersForPhrase.put(pos, feat);
                }
                if (posfeat.size() > 0) {
                    Double j = Counters.jaccardCoefficient(posfeat, feat);
                    //System.out.println("clusters for positive phrase " + pos + " is " +wordClassClustersForPhrase.get(pos) + " and the features for unknown are "  + feat + " for phrase " + p);
                    if (!j.isInfinite() && !j.isNaN()) {
                        avgSim += j;
                    }
                //if (j > maxSim)
                //  maxSim = j;
                }
            }
            avgSim /= positivePhrases.size();
        }
        sims.setCount(p, avgSim);
        if (allMaxSim.get() < avgSim)
            allMaxSim.set(avgSim);
    }
    //TODO: compute similarity with neg phrases
    return new Pair(sims, null);
}
Also used : AtomicDouble(edu.stanford.nlp.util.concurrent.AtomicDouble)

Aggregations

AtomicDouble (edu.stanford.nlp.util.concurrent.AtomicDouble)2 ConcurrentHashCounter (edu.stanford.nlp.util.concurrent.ConcurrentHashCounter)1 IOException (java.io.IOException)1