use of org.apache.lucene.search.spell.HighFrequencyDictionary in project elasticsearch-suggest-plugin by spinscale.
the class ShardSuggestService method refresh.
public ShardSuggestRefreshResponse refresh(ShardSuggestRefreshRequest shardSuggestRefreshRequest) {
String field = shardSuggestRefreshRequest.field();
if (!Strings.hasLength(field)) {
update();
} else {
resetIndexReader();
HighFrequencyDictionary dict = dictCache.getIfPresent(field);
if (dict != null)
dictCache.refresh(field);
RAMDirectory ramDirectory = ramDirectoryCache.getIfPresent(field);
if (ramDirectory != null) {
ramDirectory.close();
ramDirectoryCache.invalidate(field);
}
SpellChecker spellChecker = spellCheckerCache.getIfPresent(field);
if (spellChecker != null) {
spellCheckerCache.refresh(field);
try {
spellChecker.close();
} catch (IOException e) {
logger.error("Could not close spellchecker in indexshard [{}] for field [{}]", e, indexShard, field);
}
}
FSTCompletionLookup lookup = lookupCache.getIfPresent(field);
if (lookup != null)
lookupCache.refresh(field);
for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) {
if (fieldType.field().equals(shardSuggestRefreshRequest.field())) {
analyzingSuggesterCache.refresh(fieldType);
}
}
for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) {
if (fieldType.field().equals(shardSuggestRefreshRequest.field())) {
fuzzySuggesterCache.refresh(fieldType);
}
}
}
return new ShardSuggestRefreshResponse(shardId.index().name(), shardId.id());
}
use of org.apache.lucene.search.spell.HighFrequencyDictionary in project lucene-solr by apache.
the class TestHighFrequencyDictionary method testEmpty.
public void testEmpty() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
BytesRefIterator tf = dictionary.getEntryIterator();
assertNull(tf.next());
dir.close();
}
use of org.apache.lucene.search.spell.HighFrequencyDictionary in project lucene-solr by apache.
the class IndexBasedSpellChecker method build.
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
IndexReader reader = null;
if (sourceLocation == null) {
// Load from Solr's index
reader = searcher.getIndexReader();
} else {
// Load from Lucene index at given sourceLocation
reader = this.reader;
}
// Create the dictionary
dictionary = new HighFrequencyDictionary(reader, field, threshold);
// TODO: maybe whether or not to clear the index should be configurable?
// an incremental update is faster (just adds new terms), but if you 'expunged'
// old terms I think they might hang around.
spellChecker.clearIndex();
// TODO: you should be able to specify the IWC params?
// TODO: if we enable this, codec gets angry since field won't exist in the schema
// config.setCodec(core.getCodec());
spellChecker.indexDictionary(dictionary, new IndexWriterConfig(null), false);
}
use of org.apache.lucene.search.spell.HighFrequencyDictionary in project lucene-solr by apache.
the class Suggester method build.
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
LOG.info("build()");
if (sourceLocation == null) {
reader = searcher.getIndexReader();
dictionary = new HighFrequencyDictionary(reader, field, threshold);
} else {
try {
dictionary = new FileDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8));
} catch (UnsupportedEncodingException e) {
// should not happen
LOG.error("should not happen", e);
}
}
lookup.build(dictionary);
if (storeDir != null) {
File target = new File(storeDir, factory.storeFileName());
if (!lookup.store(new FileOutputStream(target))) {
if (sourceLocation == null) {
assert reader != null && field != null;
LOG.error("Store Lookup build from index on field: " + field + " failed reader has: " + reader.maxDoc() + " docs");
} else {
LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed");
}
} else {
LOG.info("Stored suggest data to: " + target.getAbsolutePath());
}
}
}
use of org.apache.lucene.search.spell.HighFrequencyDictionary in project lucene-solr by apache.
the class FileBasedSpellChecker method loadExternalFileDictionary.
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
try {
IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
// Get the field's analyzer
if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
// Do index-time analysis using the given fieldType's analyzer
RAMDirectory ramDir = new RAMDirectory();
LogMergePolicy mp = new LogByteSizeMergePolicy();
mp.setMergeFactor(300);
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(fieldType.getIndexAnalyzer()).setMaxBufferedDocs(150).setMergePolicy(mp).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);
for (String s : lines) {
Document d = new Document();
d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
writer.addDocument(d);
}
writer.forceMerge(1);
writer.close();
dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
} else {
// check if character encoding is defined
if (characterEncoding == null) {
dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
} else {
dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
}
}
} catch (IOException e) {
log.error("Unable to load spellings", e);
}
}
Aggregations