use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class HTMLStripCharFilterTest method testRandomHugeStrings.
public void testRandomHugeStrings() throws Exception {
int numRounds = RANDOM_MULTIPLIER * 100;
Analyzer a = newTestAnalyzer();
checkRandomData(random(), a, numRounds, 8192);
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestLimitTokenCountAnalyzer method testLimitTokenCountIndexWriter.
public void testLimitTokenCountIndexWriter() throws IOException {
for (boolean consumeAll : new boolean[] { true, false }) {
Directory dir = newDirectory();
int limit = TestUtil.nextInt(random(), 50, 101000);
MockAnalyzer mock = new MockAnalyzer(random());
// if we are consuming all tokens, we can use the checks,
// otherwise we can't
mock.setEnableChecks(consumeAll);
Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(a));
Document doc = new Document();
StringBuilder b = new StringBuilder();
for (int i = 1; i < limit; i++) b.append(" a");
b.append(" x");
b.append(" z");
doc.add(newTextField("field", b.toString(), Field.Store.NO));
writer.addDocument(doc);
writer.close();
IndexReader reader = DirectoryReader.open(dir);
Term t = new Term("field", "x");
assertEquals(1, reader.docFreq(t));
t = new Term("field", "z");
assertEquals(0, reader.docFreq(t));
reader.close();
dir.close();
a.close();
}
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestHyphenatedWordsFilter method testRandomString.
/** blast some random strings through the analyzer */
public void testRandomString() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestASCIIFoldingFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer, random().nextBoolean()));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestKeepWordFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
final Set<String> words = new HashSet<>();
words.add("a");
words.add("b");
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream stream = new KeepWordFilter(tokenizer, new CharArraySet(words, true));
return new TokenStreamComponents(tokenizer, stream);
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
Aggregations