use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class FieldCacheImpl method getTerms.
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
if (valuesIn == null) {
valuesIn = reader.getSortedDocValues(field);
}
if (valuesIn != null) {
// per-thread by SegmentReader):
return valuesIn;
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptyBinary();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptyBinary();
}
BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class FieldCacheImpl method getDocTermOrds.
// TODO: this if DocTermsIndex was already created, we
// should share it...
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
// not a general purpose filtering mechanism...
assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;
SortedSetDocValues dv = reader.getSortedSetDocValues(field);
if (dv != null) {
return dv;
}
SortedDocValues sdv = reader.getSortedDocValues(field);
if (sdv != null) {
return DocValues.singleton(sdv);
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptySortedSet();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptySortedSet();
}
// ok we need to uninvert. check if we can optimize a bit.
Terms terms = reader.terms(field);
if (terms == null) {
return DocValues.emptySortedSet();
} else {
// if #postings = #docswithfield we know that the field is "single valued enough".
// it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
// it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
long numPostings = terms.getSumDocFreq();
if (numPostings != -1 && numPostings == terms.getDocCount()) {
return DocValues.singleton(getTermsIndex(reader, field));
}
}
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
return dto.iterator(reader);
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class FieldCacheImpl method getTermsIndex.
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
SortedDocValues valuesIn = reader.getSortedDocValues(field);
if (valuesIn != null) {
// per-thread by SegmentReader):
return valuesIn;
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptySorted();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
// values because dedup can be very costly
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptySorted();
}
SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method addSortedSetField.
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
SortedSetDocValues values = valuesProducer.getSortedSet(field);
int numDocsWithField = 0;
long numOrds = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
numOrds++;
}
}
if (numDocsWithField == numOrds) {
meta.writeByte((byte) 0);
doAddSortedField(field, new EmptyDocValuesProducer() {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
}
});
return;
}
meta.writeByte((byte) 1);
assert numDocsWithField != 0;
if (numDocsWithField == maxDoc) {
meta.writeLong(-1);
meta.writeLong(0L);
} else {
long offset = data.getFilePointer();
meta.writeLong(offset);
values = valuesProducer.getSortedSet(field);
IndexedDISI.writeBitSet(values, data);
meta.writeLong(data.getFilePointer() - offset);
}
int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
meta.writeByte((byte) numberOfBitsPerOrd);
long start = data.getFilePointer();
meta.writeLong(start);
DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
writer.add(ord);
}
}
writer.finish();
meta.writeLong(data.getFilePointer() - start);
meta.writeInt(numDocsWithField);
start = data.getFilePointer();
meta.writeLong(start);
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
addressesWriter.add(addr);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
values.nextOrd();
addr++;
while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
addr++;
}
addressesWriter.add(addr);
}
addressesWriter.finish();
meta.writeLong(data.getFilePointer() - start);
addTermsDict(values);
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class Lucene60PointsWriter method merge.
@Override
public void merge(MergeState mergeState) throws IOException {
/**
* If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
* If the readers are all sorted then it's safe to perform a bulk merge of the points.
**/
for (PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene60PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
super.merge(mergeState);
return;
}
}
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) {
boolean singleValuePerDoc = true;
// Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0;
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues values = reader.getValues(fieldInfo.name);
if (values != null) {
totMaxSize += values.size();
singleValuePerDoc &= values.size() == values.getDocCount();
}
}
}
}
// we were simply reindexing them:
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
// we confirmed this up above
assert reader instanceof Lucene60PointsReader;
Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
// NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
// reader's FieldInfo as we do below) because field numbers can easily be different
// when addIndexes(Directory...) copies over segments from another index:
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
if (bkdReader != null) {
bkdReaders.add(bkdReader);
docMaps.add(mergeState.docMaps[i]);
}
}
}
}
long fp = writer.merge(dataOut, docMaps, bkdReaders);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
}
} else {
mergeOneField(mergeState, fieldInfo);
}
}
}
finish();
}
Aggregations