Search in sources :

Example 1 with ApplyDepPatterns

use of edu.stanford.nlp.patterns.dep.ApplyDepPatterns in project CoreNLP by stanfordnlp.

the class ScorePhrases method runParallelApplyPats.

private void runParallelApplyPats(Map<String, DataInstance> sents, String label, E pattern, TwoDimensionalCounter<CandidatePhrase, E> wordsandLemmaPatExtracted, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, Set<CandidatePhrase> alreadyLabeledWords) {
    Redwood.log(Redwood.DBG, "Applying pattern " + pattern + " to a total of " + sents.size() + " sentences ");
    List<String> notAllowedClasses = new ArrayList<>();
    List<String> sentids = CollectionUtils.toList(sents.keySet());
    if (constVars.doNotExtractPhraseAnyWordLabeledOtherClass) {
        for (String l : constVars.getAnswerClass().keySet()) {
            if (!l.equals(label)) {
                notAllowedClasses.add(l);
            }
        }
        notAllowedClasses.add("OTHERSEM");
    }
    Map<TokenSequencePattern, E> surfacePatternsLearnedThisIterConverted = null;
    Map<SemgrexPattern, E> depPatternsLearnedThisIterConverted = null;
    if (constVars.patternType.equals(PatternFactory.PatternType.SURFACE)) {
        surfacePatternsLearnedThisIterConverted = new HashMap<>();
        String patternStr = null;
        try {
            patternStr = pattern.toString(notAllowedClasses);
            TokenSequencePattern pat = TokenSequencePattern.compile(constVars.env.get(label), patternStr);
            surfacePatternsLearnedThisIterConverted.put(pat, pattern);
        } catch (Exception e) {
            log.info("Error applying pattern " + patternStr + ". Probably an ill formed pattern (can be because of special symbols in label names). Contact the software developer.");
            throw e;
        }
    } else if (constVars.patternType.equals(PatternFactory.PatternType.DEP)) {
        depPatternsLearnedThisIterConverted = new HashMap<>();
        SemgrexPattern pat = SemgrexPattern.compile(pattern.toString(notAllowedClasses), new edu.stanford.nlp.semgraph.semgrex.Env(constVars.env.get(label).getVariables()));
        depPatternsLearnedThisIterConverted.put(pat, pattern);
    } else {
        throw new UnsupportedOperationException();
    }
    // Apply the patterns and extract candidate phrases
    int num;
    int numThreads = constVars.numThreads;
    // If number of sentences is less, do not create so many threads
    if (sents.size() < 50)
        numThreads = 1;
    if (numThreads == 1)
        num = sents.size();
    else
        num = sents.size() / (numThreads - 1);
    ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
    List<Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>>> list = new ArrayList<>();
    for (int i = 0; i < numThreads; i++) {
        Callable<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> task = null;
        if (pattern.type.equals(PatternFactory.PatternType.SURFACE))
            // Redwood.log(Redwood.DBG, "Applying pats: assigning sentences " + i*num + " to " +Math.min(sentids.size(), (i + 1) * num) + " to thread " + (i+1));
            task = new ApplyPatterns(sents, num == sents.size() ? sentids : sentids.subList(i * num, Math.min(sentids.size(), (i + 1) * num)), surfacePatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords, constVars);
        else
            task = new ApplyDepPatterns(sents, num == sents.size() ? sentids : sentids.subList(i * num, Math.min(sentids.size(), (i + 1) * num)), depPatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords, constVars);
        Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> submit = executor.submit(task);
        list.add(submit);
    }
    // Now retrieve the result
    for (Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> future : list) {
        try {
            Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>> result = future.get();
            Redwood.log(ConstantsAndVariables.extremedebug, "Pattern " + pattern + " extracted phrases " + result.first());
            wordsandLemmaPatExtracted.addAll(result.first());
            matchedTokensByPat.addAll(result.second());
            alreadyLabeledWords.addAll(result.third());
        } catch (Exception e) {
            executor.shutdownNow();
            throw new RuntimeException(e);
        }
    }
    executor.shutdown();
}
Also used : Env(edu.stanford.nlp.ling.tokensregex.Env) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) TwoDimensionalCounter(edu.stanford.nlp.stats.TwoDimensionalCounter) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ApplyDepPatterns(edu.stanford.nlp.patterns.dep.ApplyDepPatterns) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

Env (edu.stanford.nlp.ling.tokensregex.Env)1 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)1 ApplyDepPatterns (edu.stanford.nlp.patterns.dep.ApplyDepPatterns)1 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)1 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)1 IOException (java.io.IOException)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1