use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.
the class TermsForFieldTest method setUp.
@Before
public void setUp() throws Exception {
Directory d = new RAMDirectory();
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
Document doc1 = new Document();
doc1.add(new Field("stored", "1", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("stored", "2", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("notStored", "a", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc1);
Document doc2 = new Document();
doc2.add(new Field("stored", "3", Field.Store.YES, Field.Index.ANALYZED));
doc2.add(new Field("notStored", "b", Field.Store.NO, Field.Index.ANALYZED));
doc2.add(new Field("noIndex", "?", Field.Store.YES, Field.Index.NO));
w.addDocument(doc2);
w.close();
this.reader = IndexReader.open(d);
}
use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.
the class PhraseFilterBenchmark method main.
public static void main(String[] argv) {
Directory directory = new RAMDirectory();
try {
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
int done = 0;
for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
int remaining = NUMBER_OF_SEGMENTS - i;
int numberOfDocs;
if (remaining == 1) {
numberOfDocs = TOTAL_DOCS - done;
} else {
numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
}
done += numberOfDocs;
System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
for (int d = 0; d < numberOfDocs; d++) {
int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
Document doc = new Document();
doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.commit();
}
writer.close();
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
String[][] queries = new String[TOTAL_QUERIES][];
Term[][] terms = new Term[TOTAL_QUERIES][];
for (int q = 0; q < TOTAL_QUERIES; q++) {
queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
terms[q] = new Term[queries[q].length];
for (int qw = 0; qw < queries[q].length; qw++) {
terms[q][qw] = new Term(FIELD, queries[q][qw]);
}
}
// Warm up.
new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
for (int round = 0; round < ROUNDS; round++) {
System.out.println();
String name1 = "filter";
String name2 = "query";
long ms1 = 0, ms2 = 0;
for (int step = 0; step < 2; step++) {
System.gc();
System.gc();
System.gc();
if (step == (round & 1)) {
long millis = System.currentTimeMillis();
long hits = 0;
for (String[] queryWords : queries) {
PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
}
ms1 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
} else {
long millis = System.currentTimeMillis();
long hits = 0;
for (Term[] queryTerms : terms) {
PhraseQuery pq = new PhraseQuery();
for (Term term : queryTerms) {
pq.add(term);
}
Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
hits += searcher.search(query, 1).totalHits;
}
ms2 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
}
}
System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
}
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.
the class PredicateBonusQueryTest method testBasics.
@Test
public void testBasics() throws Exception {
IndexWriter writer = new IndexWriter(this.directory, new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)));
writer.addDocument(new DocumentBuilder().add("value", "5").build());
writer.close();
IndexReader reader = IndexReader.open(this.directory);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = new ConstantScoreQuery(new TermQuery(new Term("value", "5")));
Assert.assertEquals(1.0, searcher.search(query, 1).getMaxScore(), 0.00001);
Query noBonus = new PredicateBonusQuery(query, Predicates.NONE, 10.0f);
Assert.assertEquals(1.0, searcher.search(noBonus, 1).getMaxScore(), 0.00001);
Query bonus = new PredicateBonusQuery(query, Predicates.ALL, 100.0f);
Assert.assertEquals(101.0, searcher.search(bonus, 1).getMaxScore(), 0.00001);
Query noMatch = new TermQuery(new Term("value", "not5"));
Assert.assertEquals(Double.NaN, searcher.search(noMatch, 1).getMaxScore(), 0.00001);
Query noMatchNoBonus = new PredicateBonusQuery(noMatch, Predicates.NONE, 10.0f);
Assert.assertEquals(Double.NaN, searcher.search(noMatchNoBonus, 1).getMaxScore(), 0.00001);
Query noMatchIgnoresBonus = new PredicateBonusQuery(noMatch, Predicates.ALL, 100.0f);
Assert.assertEquals(Double.NaN, searcher.search(noMatchIgnoresBonus, 1).getMaxScore(), 0.00001);
}
Aggregations