use of edu.stanford.nlp.patterns.dep.ApplyDepPatterns in project CoreNLP by stanfordnlp.
the class ScorePhrases method runParallelApplyPats.
private void runParallelApplyPats(Map<String, DataInstance> sents, String label, E pattern, TwoDimensionalCounter<CandidatePhrase, E> wordsandLemmaPatExtracted, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, Set<CandidatePhrase> alreadyLabeledWords) {
Redwood.log(Redwood.DBG, "Applying pattern " + pattern + " to a total of " + sents.size() + " sentences ");
List<String> notAllowedClasses = new ArrayList<>();
List<String> sentids = CollectionUtils.toList(sents.keySet());
if (constVars.doNotExtractPhraseAnyWordLabeledOtherClass) {
for (String l : constVars.getAnswerClass().keySet()) {
if (!l.equals(label)) {
notAllowedClasses.add(l);
}
}
notAllowedClasses.add("OTHERSEM");
}
Map<TokenSequencePattern, E> surfacePatternsLearnedThisIterConverted = null;
Map<SemgrexPattern, E> depPatternsLearnedThisIterConverted = null;
if (constVars.patternType.equals(PatternFactory.PatternType.SURFACE)) {
surfacePatternsLearnedThisIterConverted = new HashMap<>();
String patternStr = null;
try {
patternStr = pattern.toString(notAllowedClasses);
TokenSequencePattern pat = TokenSequencePattern.compile(constVars.env.get(label), patternStr);
surfacePatternsLearnedThisIterConverted.put(pat, pattern);
} catch (Exception e) {
log.info("Error applying pattern " + patternStr + ". Probably an ill formed pattern (can be because of special symbols in label names). Contact the software developer.");
throw e;
}
} else if (constVars.patternType.equals(PatternFactory.PatternType.DEP)) {
depPatternsLearnedThisIterConverted = new HashMap<>();
SemgrexPattern pat = SemgrexPattern.compile(pattern.toString(notAllowedClasses), new edu.stanford.nlp.semgraph.semgrex.Env(constVars.env.get(label).getVariables()));
depPatternsLearnedThisIterConverted.put(pat, pattern);
} else {
throw new UnsupportedOperationException();
}
// Apply the patterns and extract candidate phrases
int num;
int numThreads = constVars.numThreads;
// If number of sentences is less, do not create so many threads
if (sents.size() < 50)
numThreads = 1;
if (numThreads == 1)
num = sents.size();
else
num = sents.size() / (numThreads - 1);
ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
List<Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>>> list = new ArrayList<>();
for (int i = 0; i < numThreads; i++) {
Callable<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> task = null;
if (pattern.type.equals(PatternFactory.PatternType.SURFACE))
// Redwood.log(Redwood.DBG, "Applying pats: assigning sentences " + i*num + " to " +Math.min(sentids.size(), (i + 1) * num) + " to thread " + (i+1));
task = new ApplyPatterns(sents, num == sents.size() ? sentids : sentids.subList(i * num, Math.min(sentids.size(), (i + 1) * num)), surfacePatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords, constVars);
else
task = new ApplyDepPatterns(sents, num == sents.size() ? sentids : sentids.subList(i * num, Math.min(sentids.size(), (i + 1) * num)), depPatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords, constVars);
Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> submit = executor.submit(task);
list.add(submit);
}
// Now retrieve the result
for (Future<Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>>> future : list) {
try {
Triple<TwoDimensionalCounter<CandidatePhrase, E>, CollectionValuedMap<E, Triple<String, Integer, Integer>>, Set<CandidatePhrase>> result = future.get();
Redwood.log(ConstantsAndVariables.extremedebug, "Pattern " + pattern + " extracted phrases " + result.first());
wordsandLemmaPatExtracted.addAll(result.first());
matchedTokensByPat.addAll(result.second());
alreadyLabeledWords.addAll(result.third());
} catch (Exception e) {
executor.shutdownNow();
throw new RuntimeException(e);
}
}
executor.shutdown();
}
Aggregations