Search in sources :

Example 1 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project elasticsearch by elastic.

the class SimpleLuceneTests method testOrdering.

/**
     * Here, we verify that the order that we add fields to a document counts, and not the lexi order
     * of the field. This means that heavily accessed fields that use field selector should be added
     * first (with load and break).
     */
public void testOrdering() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("#id", "1", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    final ArrayList<String> fieldsOrder = new ArrayList<>();
    searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {

        @Override
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            fieldsOrder.add(fieldInfo.name);
            return Status.YES;
        }
    });
    assertThat(fieldsOrder.size(), equalTo(2));
    assertThat(fieldsOrder.get(0), equalTo("_id"));
    assertThat(fieldsOrder.get(1), equalTo("#id"));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) FieldInfo(org.apache.lucene.index.FieldInfo) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class RangeFieldQuery method createWeight.

@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        final RangeFieldComparator target = new RangeFieldComparator();

        private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
            DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
            values.intersect(new IntersectVisitor() {

                DocIdSetBuilder.BulkAdder adder;

                @Override
                public void grow(int count) {
                    adder = result.grow(count);
                }

                @Override
                public void visit(int docID) throws IOException {
                    adder.add(docID);
                }

                @Override
                public void visit(int docID, byte[] leaf) throws IOException {
                    if (target.matches(leaf)) {
                        adder.add(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                    return compareRange(minPackedValue, maxPackedValue);
                }
            });
            return result.build();
        }

        private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
            byte[] node = getInternalRange(minPackedValue, maxPackedValue);
            // compute range relation for BKD traversal
            if (target.intersects(node) == false) {
                return Relation.CELL_OUTSIDE_QUERY;
            } else if (target.within(node)) {
                // target within cell; continue traversing:
                return Relation.CELL_CROSSES_QUERY;
            } else if (target.contains(node)) {
                // target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
                return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
            }
            // target intersects cell; continue traversing:
            return Relation.CELL_CROSSES_QUERY;
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader reader = context.reader();
            PointValues values = reader.getPointValues(field);
            if (values == null) {
                // no docs in this segment indexed any ranges
                return null;
            }
            FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo == null) {
                // no docs in this segment indexed this field
                return null;
            }
            checkFieldInfo(fieldInfo);
            boolean allDocsMatch = false;
            if (values.getDocCount() == reader.maxDoc() && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
                allDocsMatch = true;
            }
            DocIdSetIterator iterator = allDocsMatch == true ? DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
            return new ConstantScoreScorer(this, score(), iterator);
        }

        /** get an encoded byte representation of the internal node; this is
       *  the lower half of the min array and the upper half of the max array */
        private byte[] getInternalRange(byte[] min, byte[] max) {
            byte[] range = new byte[min.length];
            final int dimSize = numDims * bytesPerDim;
            System.arraycopy(min, 0, range, 0, dimSize);
            System.arraycopy(max, dimSize, range, dimSize, dimSize);
            return range;
        }
    };
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) LeafReader(org.apache.lucene.index.LeafReader) IOException(java.io.IOException) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 3 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SimpleTextFieldInfosFormat method write.

@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
    IndexOutput out = directory.createOutput(fileName, context);
    BytesRefBuilder scratch = new BytesRefBuilder();
    boolean success = false;
    try {
        SimpleTextUtil.write(out, NUMFIELDS);
        SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
        SimpleTextUtil.writeNewline(out);
        for (FieldInfo fi : infos) {
            SimpleTextUtil.write(out, NAME);
            SimpleTextUtil.write(out, fi.name, scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, NUMBER);
            SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, INDEXOPTIONS);
            IndexOptions indexOptions = fi.getIndexOptions();
            assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
            SimpleTextUtil.write(out, indexOptions.toString(), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, STORETV);
            SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, PAYLOADS);
            SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, NORMS);
            SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, DOCVALUES);
            SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, DOCVALUES_GEN);
            SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
            SimpleTextUtil.writeNewline(out);
            Map<String, String> atts = fi.attributes();
            int numAtts = atts == null ? 0 : atts.size();
            SimpleTextUtil.write(out, NUM_ATTS);
            SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
            SimpleTextUtil.writeNewline(out);
            if (numAtts > 0) {
                for (Map.Entry<String, String> entry : atts.entrySet()) {
                    SimpleTextUtil.write(out, ATT_KEY);
                    SimpleTextUtil.write(out, entry.getKey(), scratch);
                    SimpleTextUtil.writeNewline(out);
                    SimpleTextUtil.write(out, ATT_VALUE);
                    SimpleTextUtil.write(out, entry.getValue(), scratch);
                    SimpleTextUtil.writeNewline(out);
                }
            }
            SimpleTextUtil.write(out, DIM_COUNT);
            SimpleTextUtil.write(out, Integer.toString(fi.getPointDimensionCount()), scratch);
            SimpleTextUtil.writeNewline(out);
            SimpleTextUtil.write(out, DIM_NUM_BYTES);
            SimpleTextUtil.write(out, Integer.toString(fi.getPointNumBytes()), scratch);
            SimpleTextUtil.writeNewline(out);
        }
        SimpleTextUtil.writeChecksum(out, scratch);
        success = true;
    } finally {
        if (success) {
            out.close();
        } else {
            IOUtils.closeWhileHandlingException(out);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IndexOptions(org.apache.lucene.index.IndexOptions) IndexOutput(org.apache.lucene.store.IndexOutput) HashMap(java.util.HashMap) Map(java.util.Map) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 4 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SimpleTextFieldInfosFormat method read.

@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
    ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
    BytesRefBuilder scratch = new BytesRefBuilder();
    boolean success = false;
    try {
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
        final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
        FieldInfo[] infos = new FieldInfo[size];
        for (int i = 0; i < size; i++) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NAME);
            String name = readString(NAME.length, scratch);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NUMBER);
            int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
            String s = readString(INDEXOPTIONS.length, scratch);
            final IndexOptions indexOptions = IndexOptions.valueOf(s);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), STORETV);
            boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), PAYLOADS);
            boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NORMS);
            boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DOCVALUES);
            String dvType = readString(DOCVALUES.length, scratch);
            final DocValuesType docValuesType = docValuesType(dvType);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
            final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
            int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
            Map<String, String> atts = new HashMap<>();
            for (int j = 0; j < numAtts; j++) {
                SimpleTextUtil.readLine(input, scratch);
                assert StringHelper.startsWith(scratch.get(), ATT_KEY);
                String key = readString(ATT_KEY.length, scratch);
                SimpleTextUtil.readLine(input, scratch);
                assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
                String value = readString(ATT_VALUE.length, scratch);
                atts.put(key, value);
            }
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DIM_COUNT);
            int dimensionalCount = Integer.parseInt(readString(DIM_COUNT.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DIM_NUM_BYTES);
            int dimensionalNumBytes = Integer.parseInt(readString(DIM_NUM_BYTES.length, scratch));
            infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(atts), dimensionalCount, dimensionalNumBytes);
        }
        SimpleTextUtil.checkFooter(input);
        FieldInfos fieldInfos = new FieldInfos(infos);
        success = true;
        return fieldInfos;
    } finally {
        if (success) {
            input.close();
        } else {
            IOUtils.closeWhileHandlingException(input);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexOptions(org.apache.lucene.index.IndexOptions) HashMap(java.util.HashMap) FieldInfos(org.apache.lucene.index.FieldInfos) DocValuesType(org.apache.lucene.index.DocValuesType) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 5 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedSetField.

/**
   * Merges the sortedset docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedSetField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
    List<SortedSetDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedSetDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
            if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                values = docValuesProducer.getSortedSet(fieldInfo);
            }
        }
        if (values == null) {
            values = DocValues.emptySortedSet();
        }
        toMerge.add(values);
    }
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
        SortedSetDocValues dv = toMerge.get(sub);
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                        bitset.set(ord);
                    }
                }
            }
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
        }
    }
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {

        @Override
        public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
            if (fieldInfo != mergeFieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            }
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedSetDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedSetDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
                        values = docValuesProducer.getSortedSet(readerFieldInfo);
                    }
                }
                if (values == null) {
                    values = DocValues.emptySortedSet();
                }
                cost += values.cost();
                subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            }
            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            final long finalCost = cost;
            return new SortedSetDocValues() {

                private int docID = -1;

                private SortedSetDocValuesSub currentSub;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() throws IOException {
                    currentSub = docIDMerger.next();
                    if (currentSub == null) {
                        docID = NO_MORE_DOCS;
                    } else {
                        docID = currentSub.mappedDocID;
                    }
                    return docID;
                }

                @Override
                public int advance(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long nextOrd() throws IOException {
                    long subOrd = currentSub.values.nextOrd();
                    if (subOrd == NO_MORE_ORDS) {
                        return NO_MORE_ORDS;
                    }
                    return currentSub.map.get(subOrd);
                }

                @Override
                public long cost() {
                    return finalCost;
                }

                @Override
                public BytesRef lookupOrd(long ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    long segmentOrd = map.getFirstSegmentOrd(ord);
                    return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
                }

                @Override
                public long getValueCount() {
                    return map.getValueCount();
                }
            };
        }
    });
}
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException(java.io.IOException) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput (org.apache.lucene.store.IndexOutput)6 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4