use of org.apache.lucene.store.RAMDirectory in project greplin-lucene-utils by Cue.
the class PhraseFilterBenchmark method main.
public static void main(String[] argv) {
Directory directory = new RAMDirectory();
try {
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
int done = 0;
for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
int remaining = NUMBER_OF_SEGMENTS - i;
int numberOfDocs;
if (remaining == 1) {
numberOfDocs = TOTAL_DOCS - done;
} else {
numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
}
done += numberOfDocs;
System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
for (int d = 0; d < numberOfDocs; d++) {
int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
Document doc = new Document();
doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.commit();
}
writer.close();
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
String[][] queries = new String[TOTAL_QUERIES][];
Term[][] terms = new Term[TOTAL_QUERIES][];
for (int q = 0; q < TOTAL_QUERIES; q++) {
queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
terms[q] = new Term[queries[q].length];
for (int qw = 0; qw < queries[q].length; qw++) {
terms[q][qw] = new Term(FIELD, queries[q][qw]);
}
}
// Warm up.
new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
for (int round = 0; round < ROUNDS; round++) {
System.out.println();
String name1 = "filter";
String name2 = "query";
long ms1 = 0, ms2 = 0;
for (int step = 0; step < 2; step++) {
System.gc();
System.gc();
System.gc();
if (step == (round & 1)) {
long millis = System.currentTimeMillis();
long hits = 0;
for (String[] queryWords : queries) {
PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
}
ms1 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
} else {
long millis = System.currentTimeMillis();
long hits = 0;
for (Term[] queryTerms : terms) {
PhraseQuery pq = new PhraseQuery();
for (Term term : queryTerms) {
pq.add(term);
}
Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
hits += searcher.search(query, 1).totalHits;
}
ms2 = System.currentTimeMillis() - millis;
System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
}
}
System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
}
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.apache.lucene.store.RAMDirectory in project greplin-lucene-utils by Cue.
the class TermsForFieldTest method setUp.
@Before
public void setUp() throws Exception {
Directory d = new RAMDirectory();
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
Document doc1 = new Document();
doc1.add(new Field("stored", "1", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("stored", "2", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("notStored", "a", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc1);
Document doc2 = new Document();
doc2.add(new Field("stored", "3", Field.Store.YES, Field.Index.ANALYZED));
doc2.add(new Field("notStored", "b", Field.Store.NO, Field.Index.ANALYZED));
doc2.add(new Field("noIndex", "?", Field.Store.YES, Field.Index.NO));
w.addDocument(doc2);
w.close();
this.reader = IndexReader.open(d);
}
use of org.apache.lucene.store.RAMDirectory in project ddf by codice.
the class TestGeoNamesQueryLuceneIndex method initializeIndex.
private void initializeIndex() throws IOException {
directory = new RAMDirectory();
final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
indexWriterConfig.setOpenMode(OpenMode.CREATE);
final IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_1));
indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_2));
indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_3));
indexWriter.close();
}
use of org.apache.lucene.store.RAMDirectory in project lucene-skos by behas.
the class SKOSStandardQueryParserTest method setUp.
@Before
public void setUp() throws Exception {
// adding some test data
skosEngine = new SKOSEngineMock();
skosEngine.addEntry("http://example.com/concept/1", SKOSType.PREF, "jumps");
skosEngine.addEntry("http://example.com/concept/1", SKOSType.ALT, "leaps", "hops");
skosEngine.addEntry("http://example.com/concept/2", SKOSType.PREF, "quick");
skosEngine.addEntry("http://example.com/concept/2", SKOSType.ALT, "fast", "speedy");
skosEngine.addEntry("http://example.com/concept/3", SKOSType.PREF, "over");
skosEngine.addEntry("http://example.com/concept/3", SKOSType.ALT, "above");
skosEngine.addEntry("http://example.com/concept/4", SKOSType.PREF, "lazy");
skosEngine.addEntry("http://example.com/concept/4", SKOSType.ALT, "apathic", "sluggish");
skosEngine.addEntry("http://example.com/concept/5", SKOSType.PREF, "dog");
skosEngine.addEntry("http://example.com/concept/5", SKOSType.ALT, "canine", "pooch");
skosEngine.addEntry("http://example.com/concept/6", SKOSType.PREF, "united nations");
skosEngine.addEntry("http://example.com/concept/6", SKOSType.ALT, "UN");
skosEngine.addEntry("http://example.com/concept/7", SKOSType.PREF, "lazy dog");
skosEngine.addEntry("http://example.com/concept/7", SKOSType.ALT, "Odie");
directory = new RAMDirectory();
skosAnalyzer = new SKOSAnalyzer(skosEngine, ExpansionType.LABEL);
writer = new IndexWriter(directory, new IndexWriterConfig(skosAnalyzer));
}
use of org.apache.lucene.store.RAMDirectory in project lucene-skos by behas.
the class LabelbasedTermExpansionTest method labelBasedTermExpansion.
/**
* This test indexes a sample metadata record (=lucene document) having a
* "title", "description", and "subject" field.
* <p/>
* A search for "arms" returns that record as a result because "arms" is
* defined as an alternative label for "weapons", the term which is
* contained in the subject field.
*
* @throws IOException
*/
@Test
public void labelBasedTermExpansion() throws IOException {
/* defining the document to be indexed */
Document doc = new Document();
doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED));
doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));
/* setting up the SKOS analyzer */
String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
String indexPath = "build/";
/* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);
/* Define different analyzers for different fields */
Map<String, Analyzer> analyzerPerField = new HashMap<>();
analyzerPerField.put("subject", skosAnalyzer);
PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);
/* setting up a writer with a default (simple) analyzer */
writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));
/* adding the document to the index */
writer.addDocument(doc);
/* defining a query that searches over all fields */
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);
/* creating a new searcher */
searcher = new IndexSearcher(DirectoryReader.open(writer, false));
TopDocs results = searcher.search(builder.build(), 10);
/* the document matches because "arms" is among the expanded terms */
assertEquals(1, results.totalHits);
/* defining a query that searches for a broader concept */
Query query = new TermQuery(new Term("subject", "military equipment"));
results = searcher.search(query, 10);
/* ... also returns the document as result */
assertEquals(1, results.totalHits);
}
Aggregations