use of org.apache.lucene.search.FilteredQuery in project greplin-lucene-utils by Cue.
the class PhraseFilterBenchmark method main.
public static void main(String[] argv) {
Directory directory = new RAMDirectory();
try {
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
int done = 0;
for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
int remaining = NUMBER_OF_SEGMENTS - i;
int numberOfDocs;
if (remaining == 1) {
numberOfDocs = TOTAL_DOCS - done;
} else {
numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
}
done += numberOfDocs;
System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
for (int d = 0; d < numberOfDocs; d++) {
int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
Document doc = new Document();
doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.commit();
}
writer.close();
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
String[][] queries = new String[TOTAL_QUERIES][];
Term[][] terms = new Term[TOTAL_QUERIES][];
for (int q = 0; q < TOTAL_QUERIES; q++) {
queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
terms[q] = new Term[queries[q].length];
for (int qw = 0; qw < queries[q].length; qw++) {
terms[q][qw] = new Term(FIELD, queries[q][qw]);
}
}
// Warm up.
new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
for (int round = 0; round < ROUNDS; round++) {
System.out.println();
String name1 = "filter";
String name2 = "query";
long ms1 = 0, ms2 = 0;
for (int step = 0; step < 2; step++) {
System.gc();
System.gc();
System.gc();
if (step == (round & 1)) {
long millis = System.currentTimeMillis();
long hits = 0;
for (String[] queryWords : queries) {
PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
}
ms1 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
} else {
long millis = System.currentTimeMillis();
long hits = 0;
for (Term[] queryTerms : terms) {
PhraseQuery pq = new PhraseQuery();
for (Term term : queryTerms) {
pq.add(term);
}
Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
hits += searcher.search(query, 1).totalHits;
}
ms2 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
}
}
System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
}
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations