use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class FuzzySuggesterTest method testGraphDups.
public void testGraphDups() throws Exception {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1), token("is", 1, 1), token("slow", 1, 1) }), new CannedTokenStream(new Token[] { token("wi", 1, 1), token("hotspot", 0, 3), token("fi", 1, 1), token("network", 1, 1), token("is", 1, 1), token("fast", 1, 1) }), new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1) }) };
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Input[] keys = new Input[] { new Input("wifi network is slow", 50), new Input("wi fi network is fast", 10) };
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
System.out.println("Results: " + results);
}
assertEquals(2, results.size());
assertEquals("wifi network is slow", results.get(0).key);
assertEquals(50, results.get(0).value);
assertEquals("wi fi network is fast", results.get(1).key);
assertEquals(10, results.get(1).value);
IOUtils.close(tempDir, analyzer);
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class CompletionTokenStreamTest method testWithMultipleTokens.
@Test
public void testWithMultipleTokens() throws Exception {
Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
String input = "mykeyword another keyword";
tokenStream.setReader(new StringReader(input));
BytesRef payload = new BytesRef("payload");
CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream);
completionTokenStream.setPayload(payload);
PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
CharsRefBuilder builder = new CharsRefBuilder();
builder.append("mykeyword");
builder.append(((char) CompletionAnalyzer.SEP_LABEL));
builder.append("another");
builder.append(((char) CompletionAnalyzer.SEP_LABEL));
builder.append("keyword");
assertTokenStreamContents(stream, new String[] { builder.toCharsRef().toString() }, null, null, new String[] { payload.utf8ToString() }, new int[] { 1 }, null, null);
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestQueryBuilder method testNoTermAttribute.
public void testNoTermAttribute() {
//Can't use MockTokenizer because it adds TermAttribute and we don't want that
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new Tokenizer() {
boolean wasReset = false;
@Override
public void reset() throws IOException {
super.reset();
assertFalse(wasReset);
wasReset = true;
}
@Override
public boolean incrementToken() throws IOException {
assertTrue(wasReset);
return false;
}
});
}
};
QueryBuilder builder = new QueryBuilder(analyzer);
assertNull(builder.createBooleanQuery("field", "whatever"));
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class AnalysisRequestHandlerBase method analyzeValue.
/**
* Analyzes the given value using the given Analyzer.
*
* @param value Value to analyze
* @param context The {@link AnalysisContext analysis context}.
*
* @return NamedList containing the tokens produced by analyzing the given value
*/
protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {
Analyzer analyzer = context.getAnalyzer();
if (!TokenizerChain.class.isInstance(analyzer)) {
try (TokenStream tokenStream = analyzer.tokenStream(context.getFieldName(), value)) {
NamedList<List<NamedList>> namedList = new NamedList<>();
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
return namedList;
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();
NamedList<Object> namedList = new NamedList<>();
if (0 < cfiltfacs.length) {
String source = value;
for (CharFilterFactory cfiltfac : cfiltfacs) {
Reader reader = new StringReader(source);
reader = cfiltfac.create(reader);
source = writeCharStream(namedList, reader);
}
}
TokenStream tokenStream = tfac.create();
((Tokenizer) tokenStream).setReader(tokenizerChain.initReader(null, new StringReader(value)));
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, tokens);
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
for (final AttributeSource tok : tokens) {
tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
}
// overwrite the vars "tokenStream", "tokens", and "listBasedTokenStream"
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, tokens);
}
return namedList;
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPayloadCheckQuery method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
Analyzer simplePayloadAnalyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
}
};
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(simplePayloadAnalyzer).setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
//writer.infoStream = System.out;
for (int i = 0; i < 2000; i++) {
Document doc = new Document();
doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
reader = writer.getReader();
searcher = newSearcher(reader);
writer.close();
}
Aggregations