use of org.apache.lucene.analysis.standard.StandardAnalyzer in project crate by crate.
the class LuceneBatchIteratorBenchmark method createLuceneBatchIterator.
@Setup
public void createLuceneBatchIterator() throws Exception {
IndexWriter iw = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
String columnName = "x";
for (int i = 0; i < 10_000_000; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField(columnName, i));
iw.addDocument(doc);
}
iw.commit();
iw.forceMerge(1, true);
indexSearcher = new IndexSearcher(DirectoryReader.open(iw, true));
IntegerColumnReference columnReference = new IntegerColumnReference(columnName);
columnRefs = Collections.singletonList(columnReference);
collectorContext = new CollectorContext(mock(IndexFieldDataService.class), new CollectorFieldsVisitor(0));
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project crate by crate.
the class LuceneBatchIteratorTest method prepareSearcher.
@Before
public void prepareSearcher() throws Exception {
IndexWriter iw = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
String columnName = "x";
expectedResult = new ArrayList<>(20);
for (long i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField(columnName, i));
iw.addDocument(doc);
expectedResult.add(new Object[] { i });
}
iw.commit();
indexSearcher = new IndexSearcher(DirectoryReader.open(iw, true));
LongColumnReference columnReference = new LongColumnReference(columnName);
columnRefs = Collections.singletonList(columnReference);
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project crate by crate.
the class OrderedLuceneBatchIteratorFactoryTest method prepareSearchers.
@Before
public void prepareSearchers() throws Exception {
IndexWriter iw1 = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
IndexWriter iw2 = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
expectedResult = LongStream.range(0, 20).mapToObj(i -> new Object[] { i }).collect(Collectors.toList());
// expect descending order to differentiate between insert order
expectedResult.sort(Comparator.comparingLong((Object[] o) -> ((long) o[0])).reversed());
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField(columnName, i));
if (i % 2 == 0) {
iw1.addDocument(doc);
} else {
iw2.addDocument(doc);
}
}
iw1.commit();
iw2.commit();
searcher1 = new IndexSearcher(DirectoryReader.open(iw1, true));
searcher2 = new IndexSearcher(DirectoryReader.open(iw2, true));
fieldTypeLookup = columnName -> {
LongFieldMapper.LongFieldType longFieldType = new LongFieldMapper.LongFieldType();
longFieldType.setNames(new MappedFieldType.Names(columnName));
return longFieldType;
};
orderBy = new OrderBy(Collections.singletonList(reference), reverseFlags, nullsFirst);
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project elasticsearch by elastic.
the class BitSetFilterCacheTests method testListener.
public void testListener() throws IOException {
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy()));
Document document = new Document();
document.add(new StringField("field", "value", Field.Store.NO));
writer.addDocument(document);
writer.commit();
final DirectoryReader writerReader = DirectoryReader.open(writer);
final IndexReader reader = ElasticsearchDirectoryReader.wrap(writerReader, new ShardId("test", "_na_", 0));
final AtomicLong stats = new AtomicLong();
final AtomicInteger onCacheCalls = new AtomicInteger();
final AtomicInteger onRemoveCalls = new AtomicInteger();
final BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() {
@Override
public void onCache(ShardId shardId, Accountable accountable) {
onCacheCalls.incrementAndGet();
stats.addAndGet(accountable.ramBytesUsed());
if (writerReader != reader) {
assertNotNull(shardId);
assertEquals("test", shardId.getIndexName());
assertEquals(0, shardId.id());
} else {
assertNull(shardId);
}
}
@Override
public void onRemoval(ShardId shardId, Accountable accountable) {
onRemoveCalls.incrementAndGet();
stats.addAndGet(-accountable.ramBytesUsed());
if (writerReader != reader) {
assertNotNull(shardId);
assertEquals("test", shardId.getIndexName());
assertEquals(0, shardId.id());
} else {
assertNull(shardId);
}
}
});
BitSetProducer filter = cache.getBitSetProducer(new TermQuery(new Term("field", "value")));
assertThat(matchCount(filter, reader), equalTo(1));
assertTrue(stats.get() > 0);
assertEquals(1, onCacheCalls.get());
assertEquals(0, onRemoveCalls.get());
IOUtils.close(reader, writer);
assertEquals(1, onRemoveCalls.get());
assertEquals(0, stats.get());
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project elasticsearch by elastic.
the class BitSetFilterCacheTests method testInvalidateEntries.
public void testInvalidateEntries() throws Exception {
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy()));
Document document = new Document();
document.add(new StringField("field", "value", Field.Store.NO));
writer.addDocument(document);
writer.commit();
document = new Document();
document.add(new StringField("field", "value", Field.Store.NO));
writer.addDocument(document);
writer.commit();
document = new Document();
document.add(new StringField("field", "value", Field.Store.NO));
writer.addDocument(document);
writer.commit();
DirectoryReader reader = DirectoryReader.open(writer);
reader = ElasticsearchDirectoryReader.wrap(reader, new ShardId("test", "_na_", 0));
IndexSearcher searcher = new IndexSearcher(reader);
BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() {
@Override
public void onCache(ShardId shardId, Accountable accountable) {
}
@Override
public void onRemoval(ShardId shardId, Accountable accountable) {
}
});
BitSetProducer filter = cache.getBitSetProducer(new TermQuery(new Term("field", "value")));
assertThat(matchCount(filter, reader), equalTo(3));
// now cached
assertThat(matchCount(filter, reader), equalTo(3));
// There are 3 segments
assertThat(cache.getLoadedFilters().weight(), equalTo(3L));
writer.forceMerge(1);
reader.close();
reader = DirectoryReader.open(writer);
reader = ElasticsearchDirectoryReader.wrap(reader, new ShardId("test", "_na_", 0));
searcher = new IndexSearcher(reader);
assertThat(matchCount(filter, reader), equalTo(3));
// now cached
assertThat(matchCount(filter, reader), equalTo(3));
// Only one segment now, so the size must be 1
assertThat(cache.getLoadedFilters().weight(), equalTo(1L));
reader.close();
writer.close();
// There is no reference from readers and writer to any segment in the test index, so the size in the fbs cache must be 0
assertThat(cache.getLoadedFilters().weight(), equalTo(0L));
}
Aggregations