use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class HighlighterTest method testOffsetWindowTokenFilter.
@Test
public void testOffsetWindowTokenFilter() throws Exception {
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
Analyzer a1 = new WhitespaceAnalyzer();
TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
OffsetWindowTokenFilter tots = new OffsetWindowTokenFilter(tokenStream);
for (String v : multivalued) {
TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length());
ts1.reset();
Analyzer a2 = new WhitespaceAnalyzer();
TokenStream ts2 = a2.tokenStream("", v);
ts2.reset();
while (ts1.incrementToken()) {
assertTrue(ts2.incrementToken());
assertEquals(ts1, ts2);
}
assertFalse(ts2.incrementToken());
}
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class SpellCheckComponent method inform.
@Override
public void inform(SolrCore core) {
if (initParams != null) {
LOG.info("Initializing spell checkers");
boolean hasDefault = false;
for (int i = 0; i < initParams.size(); i++) {
if (initParams.getName(i).equals("spellchecker")) {
Object cfg = initParams.getVal(i);
if (cfg instanceof NamedList) {
addSpellChecker(core, hasDefault, (NamedList) cfg);
} else if (cfg instanceof Map) {
addSpellChecker(core, hasDefault, new NamedList((Map) cfg));
} else if (cfg instanceof List) {
for (Object o : (List) cfg) {
if (o instanceof Map) {
addSpellChecker(core, hasDefault, new NamedList((Map) o));
}
}
}
}
}
Map<String, QueryConverter> queryConverters = new HashMap<>();
core.initPlugins(queryConverters, QueryConverter.class);
//ensure that there is at least one query converter defined
if (queryConverters.size() == 0) {
LOG.trace("No queryConverter defined, using default converter");
queryConverters.put("queryConverter", new SpellingQueryConverter());
}
//there should only be one
if (queryConverters.size() == 1) {
queryConverter = queryConverters.values().iterator().next();
IndexSchema schema = core.getLatestSchema();
String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType");
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer() : fieldType.getQueryAnalyzer();
//TODO: There's got to be a better way! Where's Spring when you need it?
queryConverter.setAnalyzer(analyzer);
}
}
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class TestPerFieldAnalyzerWrapper method testPerField.
public void testPerField() throws Exception {
String text = "Qwerty";
Map<String, Analyzer> analyzerPerField = Collections.<String, Analyzer>singletonMap("special", new SimpleAnalyzer());
Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField);
try (TokenStream tokenStream = analyzer.tokenStream("field", text)) {
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", termAtt.toString());
assertFalse(tokenStream.incrementToken());
tokenStream.end();
}
try (TokenStream tokenStream = analyzer.tokenStream("special", text)) {
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases", "qwerty", termAtt.toString());
assertFalse(tokenStream.incrementToken());
tokenStream.end();
}
// TODO: fix this about PFAW, this is crazy
analyzer.close();
defaultAnalyzer.close();
IOUtils.close(analyzerPerField.values());
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class TestPerFieldAnalyzerWrapper method testReuseWrapped.
public void testReuseWrapped() throws Exception {
final String text = "Qwerty";
final Analyzer specialAnalyzer = new SimpleAnalyzer();
final Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
TokenStream ts1, ts2, ts3, ts4;
final PerFieldAnalyzerWrapper wrapper1 = new PerFieldAnalyzerWrapper(defaultAnalyzer, Collections.<String, Analyzer>singletonMap("special", specialAnalyzer));
// test that the PerFieldWrapper returns the same instance as original Analyzer:
ts1 = defaultAnalyzer.tokenStream("something", text);
ts2 = wrapper1.tokenStream("something", text);
assertSame(ts1, ts2);
ts1 = specialAnalyzer.tokenStream("special", text);
ts2 = wrapper1.tokenStream("special", text);
assertSame(ts1, ts2);
// Wrap with another wrapper, which does *not* extend DelegatingAnalyzerWrapper:
final AnalyzerWrapper wrapper2 = new AnalyzerWrapper(wrapper1.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return wrapper1;
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
return new TokenStreamComponents(components.getTokenizer(), filter);
}
};
ts3 = wrapper2.tokenStream("special", text);
assertNotSame(ts1, ts3);
assertTrue(ts3 instanceof ASCIIFoldingFilter);
// check that cache did not get corrumpted:
ts2 = wrapper1.tokenStream("special", text);
assertSame(ts1, ts2);
// Wrap PerField with another PerField. In that case all TokenStreams returned must be the same:
final PerFieldAnalyzerWrapper wrapper3 = new PerFieldAnalyzerWrapper(wrapper1, Collections.<String, Analyzer>singletonMap("moreSpecial", specialAnalyzer));
ts1 = specialAnalyzer.tokenStream("special", text);
ts2 = wrapper3.tokenStream("special", text);
assertSame(ts1, ts2);
ts3 = specialAnalyzer.tokenStream("moreSpecial", text);
ts4 = wrapper3.tokenStream("moreSpecial", text);
assertSame(ts3, ts4);
assertSame(ts2, ts3);
IOUtils.close(wrapper3, wrapper2, wrapper1, specialAnalyzer, defaultAnalyzer);
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class RangeFacetsExample method index.
/** Build the example index. */
public void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// "now", 2000 sec before "now", ...:
for (int i = 0; i < 100; i++) {
Document doc = new Document();
long then = nowSec - i * 1000;
// Add as doc values field, so we can compute range facets:
doc.add(new NumericDocValuesField("timestamp", then));
// Add as numeric field so we can drill-down:
doc.add(new LongPoint("timestamp", then));
indexWriter.addDocument(doc);
}
// Open near-real-time searcher
searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
indexWriter.close();
}
Aggregations