use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestWordDelimiterFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Random random = random();
for (int i = 0; i < 512; i++) {
final int flags = i;
final CharArraySet protectedWords;
if (random.nextBoolean()) {
protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
} else {
protectedWords = null;
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
}
};
// depending upon options, this thing may or may not preserve the empty term
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class JapanesePartOfSpeechStopFilterFactory method inform.
@Override
public void inform(ResourceLoader loader) throws IOException {
stopTags = null;
CharArraySet cas = getWordSet(loader, stopTagFiles, false);
if (cas != null) {
stopTags = new HashSet<>();
for (Object element : cas) {
char[] chars = (char[]) element;
stopTags.add(new String(chars));
}
}
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestJapaneseNumberFilter method testName.
@Test
public void testName() throws IOException {
// Test name that normalises to number
assertAnalyzesTo(analyzer, "田中京一", // 京一 is normalized to a number
new String[] { "田中", "10000000000000001" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 });
// An analyzer that marks 京一 as a keyword
Analyzer keywordMarkingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
CharArraySet set = new CharArraySet(1, false);
set.add("京一");
Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, false, JapaneseTokenizer.Mode.SEARCH);
return new TokenStreamComponents(tokenizer, new JapaneseNumberFilter(new SetKeywordMarkerFilter(tokenizer, set)));
}
};
assertAnalyzesTo(keywordMarkingAnalyzer, "田中京一", // 京一 is not normalized
new String[] { "田中", "京一" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 });
keywordMarkingAnalyzer.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCharArrayMap method testMethods.
public void testMethods() {
CharArrayMap<Integer> cm = new CharArrayMap<>(2, false);
HashMap<String, Integer> hm = new HashMap<>();
hm.put("foo", 1);
hm.put("bar", 2);
cm.putAll(hm);
assertEquals(hm.size(), cm.size());
hm.put("baz", 3);
cm.putAll(hm);
assertEquals(hm.size(), cm.size());
CharArraySet cs = cm.keySet();
int n = 0;
for (Object o : cs) {
assertTrue(cm.containsKey(o));
char[] co = (char[]) o;
assertTrue(cm.containsKey(co, 0, co.length));
n++;
}
assertEquals(hm.size(), n);
assertEquals(hm.size(), cs.size());
assertEquals(cm.size(), cs.size());
cs.clear();
assertEquals(0, cs.size());
assertEquals(0, cm.size());
// keySet() should not allow adding new keys
expectThrows(UnsupportedOperationException.class, () -> {
cs.add("test");
});
cm.putAll(hm);
assertEquals(hm.size(), cs.size());
assertEquals(cm.size(), cs.size());
Iterator<Map.Entry<Object, Integer>> iter1 = cm.entrySet().iterator();
n = 0;
while (iter1.hasNext()) {
Map.Entry<Object, Integer> entry = iter1.next();
Object key = entry.getKey();
Integer val = entry.getValue();
assertEquals(cm.get(key), val);
entry.setValue(val * 100);
assertEquals(val * 100, (int) cm.get(key));
n++;
}
assertEquals(hm.size(), n);
cm.clear();
cm.putAll(hm);
assertEquals(cm.size(), n);
CharArrayMap<Integer>.EntryIterator<Integer> iter2 = cm.entrySet().iterator();
n = 0;
while (iter2.hasNext()) {
char[] keyc = iter2.nextKey();
Integer val = iter2.currentValue();
assertEquals(hm.get(new String(keyc)), val);
iter2.setValue(val * 100);
assertEquals(val * 100, (int) cm.get(keyc));
n++;
}
assertEquals(hm.size(), n);
cm.entrySet().clear();
assertEquals(0, cm.size());
assertEquals(0, cm.entrySet().size());
assertTrue(cm.isEmpty());
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestStopFilter method testStopFilt.
public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
CharArraySet stopSet = StopFilter.makeStopSet(stopWords);
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = new StopFilter(in, stopSet);
assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
Aggregations