use of edu.illinois.cs.cogcomp.llm.align.WordListFilter in project cogcomp-nlp by CogComp.
the class LlmComparatorTest method testRemoveStopwords.
@Test
public void testRemoveStopwords() {
WordListFilter filter = null;
try {
filter = new WordListFilter(new SimConfigurator().getDefaultConfig());
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
String sent = "This sentence is filled with unnecessary filler like their pronouns , punctuation and function " + "words such as for , by , from , him , her , and to .";
String[] tokens = sent.split("\\s+");
String[] filteredTokens = filter.filter(tokens);
int numSkipped = 0;
List<String> filteredToks = new LinkedList<>();
for (int i = 0; i < tokens.length; ++i) {
String tok = filteredTokens[i];
if (null == tok) {
numSkipped++;
filteredToks.add(tokens[i]);
}
}
assert (numSkipped > 0);
assert (filteredToks.contains("is"));
System.out.println("Original text: " + sent);
System.out.println("Filtered tokens: ");
System.out.println(StringUtils.join(filteredToks, "; "));
}
use of edu.illinois.cs.cogcomp.llm.align.WordListFilter in project cogcomp-nlp by CogComp.
the class LlmStringComparator method initialize.
private void initialize(ResourceManager rm_, Comparator<String, EntailmentResult> comparator) throws IOException {
ResourceManager fullRm = new SimConfigurator().getConfig(rm_);
double threshold = fullRm.getDouble(SimConfigurator.LLM_ENTAILMENT_THRESHOLD.key);
tokenizer = new IllinoisTokenizer();
this.comparator = comparator;
filter = new WordListFilter(fullRm);
neAligner = new Aligner<String, EntailmentResult>(new NEComparator(), filter);
aligner = new Aligner<String, EntailmentResult>(comparator, filter);
scorer = new GreedyAlignmentScorer<String>(threshold);
}
Aggregations