use of org.apache.lucene.index.TermsEnum in project elasticsearch by elastic.
the class SimpleLuceneTests method testNRTSearchOnClosedWriter.
public void testNRTSearchOnClosedWriter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
DirectoryReader reader = DirectoryReader.open(indexWriter);
for (int i = 0; i < 100; i++) {
Document document = new Document();
TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
field.setBoost(i);
document.add(field);
indexWriter.addDocument(document);
}
reader = refreshReader(reader);
indexWriter.close();
TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
termDocs.next();
}
use of org.apache.lucene.index.TermsEnum in project elasticsearch by elastic.
the class TextFieldMapperTests method testDefaultPositionIncrementGap.
public void testDefaultPositionIncrementGap() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").endObject().endObject().endObject().endObject().string();
DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().array("field", new String[] { "a", "b" }).endObject().bytes());
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals("a", fields[0].stringValue());
assertEquals("b", fields[1].stringValue());
IndexShard shard = indexService.getShard(0);
shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc));
shard.refresh("test");
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
TermsEnum terms = leaf.terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("b")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition());
}
}
use of org.apache.lucene.index.TermsEnum in project elasticsearch by elastic.
the class TermVectorsResponse method buildField.
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator();
BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter, boostAtt);
}
builder.endObject();
builder.endObject();
}
use of org.apache.lucene.index.TermsEnum in project elasticsearch by elastic.
the class MoreLikeThisQuery method handleUnlike.
private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
Set<Term> skipTerms = new HashSet<>();
// handle like text
if (unlikeText != null) {
for (String text : unlikeText) {
// only use the first field to be consistent
String fieldName = moreLikeFields[0];
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
skipTerms.add(new Term(fieldName, termAtt.toString()));
}
ts.end();
}
}
}
// handle like fields
if (unlikeFields != null) {
for (Fields fields : unlikeFields) {
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
final TermsEnum termsEnum = terms.iterator();
BytesRef text;
while ((text = termsEnum.next()) != null) {
skipTerms.add(new Term(fieldName, text.utf8ToString()));
}
}
}
}
if (!skipTerms.isEmpty()) {
mlt.setSkipTerms(skipTerms);
}
}
use of org.apache.lucene.index.TermsEnum in project elasticsearch by elastic.
the class MultiPhrasePrefixQuery method getPrefixTerms.
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}
TermsEnum termsEnum = _terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
}
for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
if (!StringHelper.startsWith(term, prefix.bytes())) {
break;
}
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
if (terms.size() >= maxExpansions) {
return;
}
}
}
}
Aggregations