use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.
the class TopHitsAggregatorTests method testTopLevel.
public void testTopLevel() throws Exception {
Aggregation result;
if (randomBoolean()) {
result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
} else {
Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
result = testCase(query, topHits("_name"));
}
SearchHits searchHits = ((TopHits) result).getHits();
assertEquals(3L, searchHits.getTotalHits());
assertEquals("3", searchHits.getAt(0).getId());
assertEquals("type", searchHits.getAt(0).getType());
assertEquals("2", searchHits.getAt(1).getId());
assertEquals("type", searchHits.getAt(1).getType());
assertEquals("1", searchHits.getAt(2).getId());
assertEquals("type", searchHits.getAt(2).getType());
}
use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.
the class FreqTermsEnumTests method setUp.
@Before
@Override
public void setUp() throws Exception {
super.setUp();
referenceAll = new HashMap<>();
referenceNotDeleted = new HashMap<>();
referenceFilter = new HashMap<>();
Directory dir = newDirectory();
// use keyword analyzer we rely on the stored field holding the exact term.
IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
if (frequently()) {
// we don't want to do any merges, so we won't expunge deletes
conf.setMergePolicy(NoMergePolicy.INSTANCE);
}
iw = new IndexWriter(dir, conf);
terms = new String[scaledRandomIntBetween(10, 300)];
for (int i = 0; i < terms.length; i++) {
terms[i] = randomAsciiOfLength(5);
}
int numberOfDocs = scaledRandomIntBetween(30, 300);
Document[] docs = new Document[numberOfDocs];
for (int i = 0; i < numberOfDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
docs[i] = doc;
for (String term : terms) {
if (randomBoolean()) {
continue;
}
int freq = randomIntBetween(1, 3);
for (int j = 0; j < freq; j++) {
doc.add(new TextField("field", term, Field.Store.YES));
}
}
}
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
iw.addDocument(doc);
if (rarely()) {
iw.commit();
}
}
Set<String> deletedIds = new HashSet<>();
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
if (randomInt(5) == 2) {
Term idTerm = new Term("id", doc.getField("id").stringValue());
deletedIds.add(idTerm.text());
iw.deleteDocuments(idTerm);
}
}
for (String term : terms) {
referenceAll.put(term, new FreqHolder());
referenceFilter.put(term, new FreqHolder());
referenceNotDeleted.put(term, new FreqHolder());
}
// now go over each doc, build the relevant references and filter
reader = DirectoryReader.open(iw);
List<BytesRef> filterTerms = new ArrayList<>();
for (int docId = 0; docId < reader.maxDoc(); docId++) {
Document doc = reader.document(docId);
addFreqs(doc, referenceAll);
if (!deletedIds.contains(doc.getField("id").stringValue())) {
addFreqs(doc, referenceNotDeleted);
if (randomBoolean()) {
filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
addFreqs(doc, referenceFilter);
}
}
}
filter = new TermInSetQuery("id", filterTerms);
}
use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.
the class IndexFieldDataServiceTests method testFieldDataCacheListener.
public void testFieldDataCacheListener() throws Exception {
final IndexService indexService = createIndex("test");
final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
// copy the ifdService since we can set the listener only once.
final IndexFieldDataService ifdService = new IndexFieldDataService(indexService.getIndexSettings(), indicesService.getIndicesFieldDataCache(), indicesService.getCircuitBreakerService(), indexService.mapperService());
final BuilderContext ctx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1));
final MappedFieldType mapper1 = new TextFieldMapper.Builder("s").fielddata(true).build(ctx).fieldType();
final IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
Document doc = new Document();
doc.add(new StringField("s", "thisisastring", Store.NO));
writer.addDocument(doc);
DirectoryReader open = DirectoryReader.open(writer);
final boolean wrap = randomBoolean();
final IndexReader reader = wrap ? ElasticsearchDirectoryReader.wrap(open, new ShardId("test", "_na_", 1)) : open;
final AtomicInteger onCacheCalled = new AtomicInteger();
final AtomicInteger onRemovalCalled = new AtomicInteger();
ifdService.setListener(new IndexFieldDataCache.Listener() {
@Override
public void onCache(ShardId shardId, String fieldName, Accountable ramUsage) {
if (wrap) {
assertEquals(new ShardId("test", "_na_", 1), shardId);
} else {
assertNull(shardId);
}
onCacheCalled.incrementAndGet();
}
@Override
public void onRemoval(ShardId shardId, String fieldName, boolean wasEvicted, long sizeInBytes) {
if (wrap) {
assertEquals(new ShardId("test", "_na_", 1), shardId);
} else {
assertNull(shardId);
}
onRemovalCalled.incrementAndGet();
}
});
IndexFieldData<?> ifd = ifdService.getForField(mapper1);
LeafReaderContext leafReaderContext = reader.getContext().leaves().get(0);
AtomicFieldData load = ifd.load(leafReaderContext);
assertEquals(1, onCacheCalled.get());
assertEquals(0, onRemovalCalled.get());
reader.close();
load.close();
writer.close();
assertEquals(1, onCacheCalled.get());
assertEquals(1, onRemovalCalled.get());
ifdService.clear();
}
use of org.apache.lucene.analysis.core.KeywordAnalyzer in project orientdb by orientechnologies.
the class OLuceneTxChangesMultiRid method isDeleted.
public boolean isDeleted(Document document, Object key, OIdentifiable value) {
boolean match = false;
List<String> strings = deleted.get(value.getIdentity().toString());
if (strings != null) {
MemoryIndex memoryIndex = new MemoryIndex();
for (String string : strings) {
Query q = engine.deleteQuery(string, value);
memoryIndex.reset();
for (IndexableField field : document.getFields()) {
memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer());
}
match = match || (memoryIndex.search(q) > 0.0f);
}
return match;
}
return match;
}
use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch-opennlp-plugin by spinscale.
the class OpenNlpMappingTest method setupMapperParser.
@Before
public void setupMapperParser() {
Index index = new Index("test");
Map<String, AnalyzerProviderFactory> analyzerFactoryFactories = Maps.newHashMap();
analyzerFactoryFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer()));
AnalysisService analysisService = new AnalysisService(index, ImmutableSettings.Builder.EMPTY_SETTINGS, null, analyzerFactoryFactories, null, null, null);
mapperParser = new DocumentMapperParser(index, analysisService, new PostingsFormatService(index), new SimilarityLookupService(index, ImmutableSettings.Builder.EMPTY_SETTINGS));
Settings settings = settingsBuilder().put("opennlp.models.name.file", "src/test/resources/models/en-ner-person.bin").put("opennlp.models.date.file", "src/test/resources/models/en-ner-date.bin").put("opennlp.models.location.file", "src/test/resources/models/en-ner-location.bin").build();
LogConfigurator.configure(settings);
OpenNlpService openNlpService = new OpenNlpService(settings);
openNlpService.start();
mapperParser.putTypeParser(OpenNlpMapper.CONTENT_TYPE, new OpenNlpMapper.TypeParser(analysisService, openNlpService));
}
Aggregations