use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class FieldCacheImpl method getDocTermOrds.
// TODO: this if DocTermsIndex was already created, we
// should share it...
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
// not a general purpose filtering mechanism...
assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;
SortedSetDocValues dv = reader.getSortedSetDocValues(field);
if (dv != null) {
return dv;
}
SortedDocValues sdv = reader.getSortedDocValues(field);
if (sdv != null) {
return DocValues.singleton(sdv);
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptySortedSet();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptySortedSet();
}
// ok we need to uninvert. check if we can optimize a bit.
Terms terms = reader.terms(field);
if (terms == null) {
return DocValues.emptySortedSet();
} else {
// if #postings = #docswithfield we know that the field is "single valued enough".
// it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
// it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
long numPostings = terms.getSumDocFreq();
if (numPostings != -1 && numPostings == terms.getDocCount()) {
return DocValues.singleton(getTermsIndex(reader, field));
}
}
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
return dto.iterator(reader);
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method addSortedSetField.
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
SortedSetDocValues values = valuesProducer.getSortedSet(field);
int numDocsWithField = 0;
long numOrds = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
numOrds++;
}
}
if (numDocsWithField == numOrds) {
meta.writeByte((byte) 0);
doAddSortedField(field, new EmptyDocValuesProducer() {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
}
});
return;
}
meta.writeByte((byte) 1);
assert numDocsWithField != 0;
if (numDocsWithField == maxDoc) {
meta.writeLong(-1);
meta.writeLong(0L);
} else {
long offset = data.getFilePointer();
meta.writeLong(offset);
values = valuesProducer.getSortedSet(field);
IndexedDISI.writeBitSet(values, data);
meta.writeLong(data.getFilePointer() - offset);
}
int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
meta.writeByte((byte) numberOfBitsPerOrd);
long start = data.getFilePointer();
meta.writeLong(start);
DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
writer.add(ord);
}
}
writer.finish();
meta.writeLong(data.getFilePointer() - start);
meta.writeInt(numDocsWithField);
start = data.getFilePointer();
meta.writeLong(start);
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
addressesWriter.add(addr);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
values.nextOrd();
addr++;
while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
addr++;
}
addressesWriter.add(addr);
}
addressesWriter.finish();
meta.writeLong(data.getFilePointer() - start);
addTermsDict(values);
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class SortedSetDocValuesRangeQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedSetDocValues values = getValues(context.reader(), field);
if (values == null) {
return null;
}
final long minOrd;
if (lowerValue == null) {
minOrd = 0;
} else {
final long ord = values.lookupTerm(lowerValue);
if (ord < 0) {
minOrd = -1 - ord;
} else if (lowerInclusive) {
minOrd = ord;
} else {
minOrd = ord + 1;
}
}
final long maxOrd;
if (upperValue == null) {
maxOrd = values.getValueCount() - 1;
} else {
final long ord = values.lookupTerm(upperValue);
if (ord < 0) {
maxOrd = -2 - ord;
} else if (upperInclusive) {
maxOrd = ord;
} else {
maxOrd = ord - 1;
}
}
if (minOrd > maxOrd) {
return null;
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
if (singleton != null) {
iterator = new TwoPhaseIterator(singleton) {
@Override
public boolean matches() throws IOException {
final long ord = singleton.ordValue();
return ord >= minOrd && ord <= maxOrd;
}
@Override
public float matchCost() {
// 2 comparisons
return 2;
}
};
} else {
iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
if (ord < minOrd) {
continue;
}
// Values are sorted, so the first ord that is >= minOrd is our best candidate
return ord <= maxOrd;
}
// all ords were < minOrd
return false;
}
@Override
public float matchCost() {
// 2 comparisons
return 2;
}
};
}
return new ConstantScoreScorer(this, score(), iterator);
}
};
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class DocValuesTermsQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
final LongBitSet bits = new LongBitSet(values.getValueCount());
boolean matchesAtLeastOneTerm = false;
TermIterator iterator = termData.iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
final long ord = values.lookupTerm(term);
if (ord >= 0) {
matchesAtLeastOneTerm = true;
bits.set(ord);
}
}
if (matchesAtLeastOneTerm == false) {
return null;
}
return new ConstantScoreScorer(this, score(), new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
if (bits.get(ord)) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
// lookup in a bitset
return 3;
}
});
}
};
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TrieIntField method getSingleValueSource.
@Override
protected ValueSource getSingleValueSource(SortedSetSelector.Type choice, SchemaField f) {
return new SortedSetFieldSource(f.getName(), choice) {
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
// needed for nested anon class ref
SortedSetFieldSource thisAsSortedSetFieldSource = this;
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
return new IntDocValues(thisAsSortedSetFieldSource) {
private int lastDocID;
private boolean setDoc(int docID) throws IOException {
if (docID < lastDocID) {
throw new IllegalArgumentException("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
}
if (docID > view.docID()) {
lastDocID = docID;
return docID == view.advance(docID);
} else {
return docID == view.docID();
}
}
@Override
public int intVal(int doc) throws IOException {
if (setDoc(doc)) {
BytesRef bytes = view.binaryValue();
assert bytes.length > 0;
return LegacyNumericUtils.prefixCodedToInt(bytes);
} else {
return 0;
}
}
@Override
public boolean exists(int doc) throws IOException {
return setDoc(doc);
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) throws IOException {
if (setDoc(doc)) {
mval.exists = true;
mval.value = LegacyNumericUtils.prefixCodedToInt(view.binaryValue());
} else {
mval.exists = false;
mval.value = 0;
}
}
};
}
};
}
};
}
Aggregations