use of org.apache.lucene.index.FieldInfo in project elasticsearch by elastic.
the class SimpleLuceneTests method testOrdering.
/**
* Here, we verify that the order that we add fields to a document counts, and not the lexi order
* of the field. This means that heavily accessed fields that use field selector should be added
* first (with load and break).
*/
public void testOrdering() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("#id", "1", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
final ArrayList<String> fieldsOrder = new ArrayList<>();
searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
fieldsOrder.add(fieldInfo.name);
return Status.YES;
}
});
assertThat(fieldsOrder.size(), equalTo(2));
assertThat(fieldsOrder.get(0), equalTo("_id"));
assertThat(fieldsOrder.get(1), equalTo("#id"));
indexWriter.close();
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class RangeFieldQuery method createWeight.
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
final RangeFieldComparator target = new RangeFieldComparator();
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (target.matches(leaf)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return compareRange(minPackedValue, maxPackedValue);
}
});
return result.build();
}
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
// compute range relation for BKD traversal
if (target.intersects(node) == false) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (target.within(node)) {
// target within cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
} else if (target.contains(node)) {
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
}
// target intersects cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// no docs in this segment indexed any ranges
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// no docs in this segment indexed this field
return null;
}
checkFieldInfo(fieldInfo);
boolean allDocsMatch = false;
if (values.getDocCount() == reader.maxDoc() && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
allDocsMatch = true;
}
DocIdSetIterator iterator = allDocsMatch == true ? DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
return new ConstantScoreScorer(this, score(), iterator);
}
/** get an encoded byte representation of the internal node; this is
* the lower half of the min array and the upper half of the max array */
private byte[] getInternalRange(byte[] min, byte[] max) {
byte[] range = new byte[min.length];
final int dimSize = numDims * bytesPerDim;
System.arraycopy(min, 0, range, 0, dimSize);
System.arraycopy(max, dimSize, range, dimSize, dimSize);
return range;
}
};
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class SimpleTextFieldInfosFormat method write.
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexOutput out = directory.createOutput(fileName, context);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.write(out, NUMFIELDS);
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
SimpleTextUtil.writeNewline(out);
for (FieldInfo fi : infos) {
SimpleTextUtil.write(out, NAME);
SimpleTextUtil.write(out, fi.name, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NUMBER);
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, INDEXOPTIONS);
IndexOptions indexOptions = fi.getIndexOptions();
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
SimpleTextUtil.write(out, indexOptions.toString(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, STORETV);
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PAYLOADS);
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS);
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES);
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out);
Map<String, String> atts = fi.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(out, NUM_ATTS);
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(out);
if (numAtts > 0) {
for (Map.Entry<String, String> entry : atts.entrySet()) {
SimpleTextUtil.write(out, ATT_KEY);
SimpleTextUtil.write(out, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ATT_VALUE);
SimpleTextUtil.write(out, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(out);
}
}
SimpleTextUtil.write(out, DIM_COUNT);
SimpleTextUtil.write(out, Integer.toString(fi.getPointDimensionCount()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DIM_NUM_BYTES);
SimpleTextUtil.write(out, Integer.toString(fi.getPointNumBytes()), scratch);
SimpleTextUtil.writeNewline(out);
}
SimpleTextUtil.writeChecksum(out, scratch);
success = true;
} finally {
if (success) {
out.close();
} else {
IOUtils.closeWhileHandlingException(out);
}
}
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class SimpleTextFieldInfosFormat method read.
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
FieldInfo[] infos = new FieldInfo[size];
for (int i = 0; i < size; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NAME);
String name = readString(NAME.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMBER);
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
String s = readString(INDEXOPTIONS.length, scratch);
final IndexOptions indexOptions = IndexOptions.valueOf(s);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), STORETV);
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), PAYLOADS);
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NORMS);
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES);
String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
Map<String, String> atts = new HashMap<>();
for (int j = 0; j < numAtts; j++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_KEY);
String key = readString(ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
String value = readString(ATT_VALUE.length, scratch);
atts.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DIM_COUNT);
int dimensionalCount = Integer.parseInt(readString(DIM_COUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DIM_NUM_BYTES);
int dimensionalNumBytes = Integer.parseInt(readString(DIM_NUM_BYTES.length, scratch));
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(atts), dimensionalCount, dimensionalNumBytes);
}
SimpleTextUtil.checkFooter(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class DocValuesConsumer method mergeSortedSetField.
/**
* Merges the sortedset docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedSetField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
List<SortedSetDocValues> toMerge = new ArrayList<>();
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
toMerge.add(values);
}
// step 1: iterate thru each sub and mark terms still in use
TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < liveTerms.length; sub++) {
SortedSetDocValues dv = toMerge.get(sub);
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
weights[sub] = dv.getValueCount();
} else {
LongBitSet bitset = new LongBitSet(dv.getValueCount());
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
long ord;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bitset.set(ord);
}
}
}
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
weights[sub] = bitset.cardinality();
}
}
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
// step 3: add field
addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
if (fieldInfo != mergeFieldInfo) {
throw new IllegalArgumentException("wrong FieldInfo");
}
// We must make new iterators + DocIDMerger for each iterator:
List<SortedSetDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(readerFieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
cost += values.cost();
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
final long finalCost = cost;
return new SortedSetDocValues() {
private int docID = -1;
private SortedSetDocValuesSub currentSub;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
currentSub = docIDMerger.next();
if (currentSub == null) {
docID = NO_MORE_DOCS;
} else {
docID = currentSub.mappedDocID;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long nextOrd() throws IOException {
long subOrd = currentSub.values.nextOrd();
if (subOrd == NO_MORE_ORDS) {
return NO_MORE_ORDS;
}
return currentSub.map.get(subOrd);
}
@Override
public long cost() {
return finalCost;
}
@Override
public BytesRef lookupOrd(long ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
long segmentOrd = map.getFirstSegmentOrd(ord);
return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
}
@Override
public long getValueCount() {
return map.getValueCount();
}
};
}
});
}
Aggregations