use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project orientdb by orientechnologies.
the class LuceneNativeFacet method index.
/**
* Build the example index.
*/
private void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
indexWriter.addDocument(config.build(taxoWriter, doc));
indexWriter.close();
taxoWriter.close();
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project camel by apache.
the class LuceneIndexAndQueryProducerTest method createRegistry.
@Override
protected JndiRegistry createRegistry() throws Exception {
JndiRegistry registry = new JndiRegistry(createJndiContext());
registry.bind("std", new File("target/stdindexDir"));
registry.bind("load_dir", new File("src/test/resources/sources"));
registry.bind("stdAnalyzer", new StandardAnalyzer());
registry.bind("simple", new File("target/simpleindexDir"));
registry.bind("simpleAnalyzer", new SimpleAnalyzer());
registry.bind("whitespace", new File("target/whitespaceindexDir"));
registry.bind("whitespaceAnalyzer", new WhitespaceAnalyzer());
return registry;
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project camel by apache.
the class LuceneQueryProcessorTest method testWildcardSearcher.
@Test
public void testWildcardSearcher() throws Exception {
final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
MockEndpoint mockSearchEndpoint = getMockEndpoint("mock:searchResult");
context.stop();
context.addRoutes(new RouteBuilder() {
public void configure() {
try {
from("direct:start").setHeader("QUERY", constant("Carl*")).process(new LuceneQueryProcessor("target/simpleindexDir", analyzer, null, 20)).to("direct:next");
} catch (Exception e) {
e.printStackTrace();
}
from("direct:next").process(new Processor() {
public void process(Exchange exchange) throws Exception {
Hits hits = exchange.getIn().getBody(Hits.class);
printResults(hits);
}
private void printResults(Hits hits) {
LOG.debug("Number of hits: " + hits.getNumberOfHits());
for (int i = 0; i < hits.getNumberOfHits(); i++) {
LOG.debug("Hit " + i + " Index Location:" + hits.getHit().get(i).getHitLocation());
LOG.debug("Hit " + i + " Score:" + hits.getHit().get(i).getScore());
LOG.debug("Hit " + i + " Data:" + hits.getHit().get(i).getData());
}
}
}).to("mock:searchResult");
}
});
context.start();
LOG.debug("------------Beginning Wildcard + Simple Analyzer Phrase Searcher Test---------------");
sendRequest();
mockSearchEndpoint.assertIsSatisfied();
LOG.debug("------------Completed Wildcard + Simple Analyzer Phrase Searcher Test---------------");
context.stop();
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class SimpleQueryConverter method convert.
@Override
public Collection<Token> convert(String origQuery) {
Collection<Token> result = new HashSet<>();
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
try (TokenStream ts = analyzer.tokenStream("", origQuery)) {
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
Token tok = new Token();
tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
tok.setFlags(flagsAtt.getFlags());
tok.setPayload(payloadAtt.getPayload());
tok.setPositionIncrement(posIncAtt.getPositionIncrement());
tok.setType(typeAtt.type());
result.add(tok);
}
ts.end();
return result;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class SpellingQueryConverterTest method testSpecialChars.
@Test
public void testSpecialChars() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
String original = "field_with_underscore:value_with_underscore";
Collection<Token> tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "field_with_digits123:value_with_digits123";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "field-with-hyphens:value-with-hyphens";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
// mix 'em up and add some to the value
// original = "field_with-123s:value_,.|with-hyphens";
// tokens = converter.convert(original);
// assertTrue("tokens is null and it shouldn't be", tokens != null);
// assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
// assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "foo:bar^5.0";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
String firstKeyword = "value1";
String secondKeyword = "value2";
original = "field-with-parenthesis:(" + firstKeyword + " " + secondKeyword + ")";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
assertTrue("first Token is not " + firstKeyword, new ArrayList<>(tokens).get(0).toString().equals(firstKeyword));
assertTrue("second Token is not " + secondKeyword, new ArrayList<>(tokens).get(1).toString().equals(secondKeyword));
}
Aggregations