use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class AbstractTermVectorsTestCase method indexDocsWithLucene.
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
Map<String, Analyzer> mapping = new HashMap<>();
for (TestFieldSetting field : testDocs[0].fieldSettings) {
if (field.storedPayloads) {
mapping.put(field.name, new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenFilter filter = new LowerCaseFilter(tokenizer);
filter = new TypeAsPayloadTokenFilter(filter);
return new TokenStreamComponents(tokenizer, filter);
}
});
}
}
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping);
Directory dir = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(wrapper);
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
for (TestDoc doc : testDocs) {
Document d = new Document();
d.add(new Field("id", doc.id, StringField.TYPE_STORED));
for (int i = 0; i < doc.fieldContent.length; i++) {
FieldType type = new FieldType(TextField.TYPE_STORED);
TestFieldSetting fieldSetting = doc.fieldSettings[i];
type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
type.setStoreTermVectors(true);
type.freeze();
d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
}
writer.updateDocument(new Term("id", doc.id), d);
writer.commit();
}
writer.close();
return DirectoryReader.open(dir);
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class TermVectorsUnitTests method writeEmptyTermVector.
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPayloads(false);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document d = new Document();
d.add(new Field("id", "abc", StringField.TYPE_STORED));
writer.updateDocument(new Term("id", "abc"), d);
writer.commit();
writer.close();
DirectoryReader dr = DirectoryReader.open(dir);
IndexSearcher s = new IndexSearcher(dr);
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
ScoreDoc[] scoreDocs = search.scoreDocs;
int doc = scoreDocs[0].doc;
Fields fields = dr.getTermVectors(doc);
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
outResponse.setFields(fields, null, flags, fields);
outResponse.setExists(true);
dr.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class TermVectorsUnitTests method writeStandardTermVector.
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPayloads(false);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document d = new Document();
d.add(new Field("id", "abc", StringField.TYPE_STORED));
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
writer.updateDocument(new Term("id", "abc"), d);
writer.commit();
writer.close();
DirectoryReader dr = DirectoryReader.open(dir);
IndexSearcher s = new IndexSearcher(dr);
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
ScoreDoc[] scoreDocs = search.scoreDocs;
int doc = scoreDocs[0].doc;
Fields termVectors = dr.getTermVectors(doc);
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
outResponse.setFields(termVectors, null, flags, termVectors);
dr.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class StoreRecovery method addIndices.
void addIndices(RecoveryState.Index indexRecoveryStats, Directory target, Directory... sources) throws IOException {
target = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(target, indexRecoveryStats), new IndexWriterConfig(null).setCommitOnClose(false).setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.CREATE))) {
writer.addIndexes(sources);
writer.commit();
}
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class CustomUnifiedHighlighterTests method assertHighlightOneDoc.
private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newTieredMergePolicy(random()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
ft.freeze();
Document doc = new Document();
for (String input : inputs) {
Field field = new Field(fieldName, "", ft);
field.setStringValue(input);
doc.add(field);
}
iw.addDocument(doc);
DirectoryReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), locale, breakIterator, rawValue, noMatchSize);
highlighter.setFieldMatcher((name) -> "text".equals(name));
final Snippet[] snippets = highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length);
assertEquals(snippets.length, expectedPassages.length);
for (int i = 0; i < snippets.length; i++) {
assertEquals(snippets[i].getText(), expectedPassages[i]);
}
reader.close();
dir.close();
}
Aggregations