use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class PagedBytesIndexFieldData method loadDirect.
@Override
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
LeafReader reader = context.reader();
AtomicOrdinalsFieldData data = null;
PagedBytesEstimator estimator = new PagedBytesEstimator(context, breakerService.getBreaker(CircuitBreaker.FIELDDATA), getFieldName());
Terms terms = reader.terms(getFieldName());
if (terms == null) {
data = AbstractAtomicOrdinalsFieldData.empty();
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
final PagedBytes bytes = new PagedBytes(15);
final PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
final float acceptableTransientOverheadRatio = OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO;
// Wrap the context in an estimator and use it to either estimate
// the entire set, or wrap the TermsEnum so it can be calculated
// per-term
TermsEnum termsEnum = estimator.beforeLoad(terms);
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
PostingsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
final long termOrd = builder.nextOrdinal();
assert termOrd == termOrdToBytesOffset.size();
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
builder.addDoc(docId);
}
}
PagedBytes.Reader bytesReader = bytes.freeze(true);
final Ordinals ordinals = builder.build();
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
success = true;
return data;
} finally {
if (!success) {
// If something went wrong, unwind any current estimations we've made
estimator.afterLoad(termsEnum, 0);
} else {
// Call .afterLoad() to adjust the breaker now that we have an exact size
estimator.afterLoad(termsEnum, data.ramBytesUsed());
}
}
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class TextFieldMapperTests method testPositionIncrementGap.
public void testPositionIncrementGap() throws IOException {
final int positionIncrementGap = randomIntBetween(1, 1000);
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").field("position_increment_gap", positionIncrementGap).endObject().endObject().endObject().endObject().string();
DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().array("field", new String[] { "a", "b" }).endObject().bytes());
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals("a", fields[0].stringValue());
assertEquals("b", fields[1].stringValue());
IndexShard shard = indexService.getShard(0);
shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc));
shard.refresh("test");
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
TermsEnum terms = leaf.terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("b")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(positionIncrementGap + 1, postings.nextPosition());
}
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class AllTermQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
if (needsScores == false) {
return new TermQuery(term).createWeight(searcher, needsScores);
}
final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
final TermStatistics termStats = searcher.termStatistics(term, termStates);
final Similarity similarity = searcher.getSimilarity(needsScores);
final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
return new Weight(this) {
@Override
public float getValueForNormalization() throws IOException {
return stats.getValueForNormalization();
}
@Override
public void normalize(float norm, float topLevelBoost) {
stats.normalize(norm, topLevelBoost);
}
@Override
public void extractTerms(Set<Term> terms) {
terms.add(term);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
AllTermScorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float score = scorer.score();
float freq = scorer.freq();
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
}
}
return Explanation.noMatch("no matching term");
}
@Override
public AllTermScorer scorer(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator();
if (termsEnum == null) {
return null;
}
final TermState state = termStates.get(context.ord);
if (state == null) {
// Term does not exist in this segment
return null;
}
termsEnum.seekExact(term.bytes(), state);
PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assert docs != null;
return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
}
};
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class FilterableTermsEnum method seekExact.
@Override
public boolean seekExact(BytesRef text) throws IOException {
int docFreq = 0;
long totalTermFreq = 0;
for (Holder anEnum : enums) {
if (anEnum.termsEnum.seekExact(text)) {
if (anEnum.bits == null) {
docFreq += anEnum.termsEnum.docFreq();
if (docsEnumFlag == PostingsEnum.FREQS) {
long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
totalTermFreq = -1;
continue;
}
totalTermFreq += leafTotalTermFreq;
}
} else {
final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag);
// 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
if (docsEnumFlag == PostingsEnum.FREQS) {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
continue;
}
docFreq++;
// docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
// is really 1 or unrecorded when filtering like this
totalTermFreq += docsEnum.freq();
}
} else {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
continue;
}
// docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call
docFreq++;
}
}
}
}
}
if (docFreq > 0) {
currentDocFreq = docFreq;
currentTotalTermFreq = totalTermFreq;
current = text;
return true;
} else {
currentDocFreq = NOT_FOUND;
currentTotalTermFreq = NOT_FOUND;
current = null;
return false;
}
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class Versions method loadSeqNo.
/**
* Returns the sequence number for the given uid term, returning
* {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
*/
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
List<LeafReaderContext> leaves = reader.leaves();
if (leaves.isEmpty()) {
return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
// which are likely to be in the last segments
for (int i = leaves.size() - 1; i >= 0; i--) {
LeafReader leaf = leaves.get(i).reader();
Bits liveDocs = leaf.getLiveDocs();
TermsEnum termsEnum = null;
SortedNumericDocValues dvField = null;
PostingsEnum docsEnum = null;
final Fields fields = leaf.fields();
if (fields != null) {
Terms terms = fields.terms(UidFieldMapper.NAME);
if (terms != null) {
termsEnum = terms.iterator();
assert termsEnum != null;
dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
assert dvField != null;
final BytesRef id = term.bytes();
if (termsEnum.seekExact(id)) {
// there may be more than one matching docID, in the
// case of nested docs, so we want the last one:
docsEnum = termsEnum.postings(docsEnum, 0);
int docID = DocIdSetIterator.NO_MORE_DOCS;
for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
docID = d;
}
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
dvField.setDocument(docID);
assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count();
return dvField.valueAt(0);
}
}
}
}
}
return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
Aggregations