Search in sources :

Example 6 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class FastTaxonomyFacetCounts method countAll.

private final void countAll(IndexReader reader) throws IOException {
    for (LeafReaderContext context : reader.leaves()) {
        BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
        if (dv == null) {
            // this reader does not have DocValues for the requested category list
            continue;
        }
        Bits liveDocs = context.reader().getLiveDocs();
        for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
            if (liveDocs != null && liveDocs.get(doc) == false) {
                continue;
            }
            final BytesRef bytesRef = dv.binaryValue();
            byte[] bytes = bytesRef.bytes;
            int end = bytesRef.offset + bytesRef.length;
            int ord = 0;
            int offset = bytesRef.offset;
            int prev = 0;
            while (offset < end) {
                byte b = bytes[offset++];
                if (b >= 0) {
                    prev = ord = ((ord << 7) | b) + prev;
                    ++values[ord];
                    ord = 0;
                } else {
                    ord = (ord << 7) | (b & 0x7F);
                }
            }
        }
    }
    rollup();
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 7 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class JoinDocFreqValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final BinaryDocValues terms = DocValues.getBinary(readerContext.reader(), field);
    final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
    Terms t = MultiFields.getTerms(top, qfield);
    final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator();
    return new IntDocValues(this) {

        int lastDocID = -1;

        @Override
        public int intVal(int doc) throws IOException {
            if (doc < lastDocID) {
                throw new IllegalArgumentException("docs were sent out-of-order: lastDocID=" + lastDocID + " vs docID=" + doc);
            }
            lastDocID = doc;
            int curDocID = terms.docID();
            if (doc > curDocID) {
                curDocID = terms.advance(doc);
            }
            if (doc == curDocID) {
                BytesRef term = terms.binaryValue();
                if (termsEnum.seekExact(term)) {
                    return termsEnum.docFreq();
                }
            }
            return 0;
        }
    };
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) Terms(org.apache.lucene.index.Terms) IntDocValues(org.apache.lucene.queries.function.docvalues.IntDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 8 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestOrdinalMappingLeafReader method verifyResults.

private void verifyResults(Directory indexDir, Directory taxoDir) throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    FacetsCollector collector = new FacetsCollector();
    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
    // tag facets
    Facets tagFacets = new FastTaxonomyFacetCounts("$tags", taxoReader, facetConfig, collector);
    FacetResult result = tagFacets.getTopChildren(10, "tag");
    for (LabelAndValue lv : result.labelValues) {
        if (VERBOSE) {
            System.out.println(lv);
        }
        assertEquals(NUM_DOCS, lv.value.intValue());
    }
    // id facets
    Facets idFacets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, collector);
    FacetResult idResult = idFacets.getTopChildren(10, "id");
    assertEquals(NUM_DOCS, idResult.childCount);
    // each "id" appears twice
    assertEquals(NUM_DOCS * 2, idResult.value);
    BinaryDocValues bdv = MultiDocValues.getBinaryValues(indexReader, "bdv");
    BinaryDocValues cbdv = MultiDocValues.getBinaryValues(indexReader, "cbdv");
    for (int i = 0; i < indexReader.maxDoc(); i++) {
        assertEquals(i, bdv.nextDoc());
        assertEquals(i, cbdv.nextDoc());
        assertEquals(Integer.parseInt(cbdv.binaryValue().utf8ToString()), Integer.parseInt(bdv.binaryValue().utf8ToString()) * 2);
    }
    IOUtils.close(indexReader, taxoReader);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetResult(org.apache.lucene.facet.FacetResult) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LabelAndValue(org.apache.lucene.facet.LabelAndValue) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 9 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class AnalyzingInfixSuggester method createResults.

/**
   * Create the results based on the search hits.
   * Can be overridden by subclass to add particular behavior (e.g. weight transformation).
   * Note that there is no prefix toke (the {@code prefixToken} argument will
   * be null) whenever the final token in the incoming request was in fact finished
   * (had trailing characters, such as white-space).
   *
   * @throws IOException If there are problems reading fields from the underlying Lucene index.
   */
protected List<LookupResult> createResults(IndexSearcher searcher, TopFieldDocs hits, int num, CharSequence charSequence, boolean doHighlight, Set<String> matchedTokens, String prefixToken) throws IOException {
    List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    List<LookupResult> results = new ArrayList<>();
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        FieldDoc fd = (FieldDoc) hits.scoreDocs[i];
        BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
        textDV.advance(fd.doc);
        BytesRef term = textDV.binaryValue();
        String text = term.utf8ToString();
        long score = (Long) fd.fields[0];
        // This will just be null if app didn't pass payloads to build():
        // TODO: maybe just stored fields?  they compress...
        BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
        BytesRef payload;
        if (payloadsDV != null) {
            if (payloadsDV.advance(fd.doc) == fd.doc) {
                payload = BytesRef.deepCopyOf(payloadsDV.binaryValue());
            } else {
                payload = new BytesRef(BytesRef.EMPTY_BYTES);
            }
        } else {
            payload = null;
        }
        // Must look up sorted-set by segment:
        int segment = ReaderUtil.subIndex(fd.doc, leaves);
        SortedSetDocValues contextsDV = leaves.get(segment).reader().getSortedSetDocValues(CONTEXTS_FIELD_NAME);
        Set<BytesRef> contexts;
        if (contextsDV != null) {
            contexts = new HashSet<BytesRef>();
            int targetDocID = fd.doc - leaves.get(segment).docBase;
            if (contextsDV.advance(targetDocID) == targetDocID) {
                long ord;
                while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                    BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord));
                    contexts.add(context);
                }
            }
        } else {
            contexts = null;
        }
        LookupResult result;
        if (doHighlight) {
            result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload, contexts);
        } else {
            result = new LookupResult(text, score, payload, contexts);
        }
        results.add(result);
    }
    return results;
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef)

Example 10 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class SolrDocumentFetcher method decorateDocValueFields.

/**
   * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
   *
   * @param doc
   *          A SolrDocument or SolrInputDocument instance where docValues will be added
   * @param docid
   *          The lucene docid of the document to be populated
   * @param fields
   *          The list of docValues fields to be decorated
   */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            // Searcher doesn't have info about this field, hence ignore it.
            continue;
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch(dvType) {
            case NUMERIC:
                final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
                if (ndv == null) {
                    continue;
                }
                Long val;
                if (ndv.advanceExact(localId)) {
                    val = ndv.longValue();
                } else {
                    continue;
                }
                Object newVal = val;
                if (schemaField.getType().isPointField()) {
                    // TODO: Maybe merge PointField with TrieFields here
                    NumberType type = schemaField.getType().getNumberType();
                    switch(type) {
                        case INTEGER:
                            newVal = val.intValue();
                            break;
                        case LONG:
                            newVal = val.longValue();
                            break;
                        case FLOAT:
                            newVal = Float.intBitsToFloat(val.intValue());
                            break;
                        case DOUBLE:
                            newVal = Double.longBitsToDouble(val);
                            break;
                        case DATE:
                            newVal = new Date(val);
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                    }
                } else {
                    if (schemaField.getType() instanceof TrieIntField) {
                        newVal = val.intValue();
                    } else if (schemaField.getType() instanceof TrieFloatField) {
                        newVal = Float.intBitsToFloat(val.intValue());
                    } else if (schemaField.getType() instanceof TrieDoubleField) {
                        newVal = Double.longBitsToDouble(val);
                    } else if (schemaField.getType() instanceof TrieDateField) {
                        newVal = new Date(val);
                    } else if (schemaField.getType() instanceof EnumField) {
                        newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                    }
                }
                doc.addField(fieldName, newVal);
                break;
            case BINARY:
                BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
                if (bdv == null) {
                    continue;
                }
                BytesRef value;
                if (bdv.advanceExact(localId)) {
                    value = BytesRef.deepCopyOf(bdv.binaryValue());
                } else {
                    continue;
                }
                doc.addField(fieldName, value);
                break;
            case SORTED:
                SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
                if (sdv == null) {
                    continue;
                }
                if (sdv.advanceExact(localId)) {
                    final BytesRef bRef = sdv.binaryValue();
                    // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                    if (schemaField.getType() instanceof BoolField) {
                        doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                    } else {
                        doc.addField(fieldName, bRef.utf8ToString());
                    }
                }
                break;
            case SORTED_NUMERIC:
                final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
                NumberType type = schemaField.getType().getNumberType();
                if (numericDv != null) {
                    if (numericDv.advance(localId) == localId) {
                        final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                        for (int i = 0; i < numericDv.docValueCount(); i++) {
                            long number = numericDv.nextValue();
                            switch(type) {
                                case INTEGER:
                                    outValues.add((int) number);
                                    break;
                                case LONG:
                                    outValues.add(number);
                                    break;
                                case FLOAT:
                                    outValues.add(NumericUtils.sortableIntToFloat((int) number));
                                    break;
                                case DOUBLE:
                                    outValues.add(NumericUtils.sortableLongToDouble(number));
                                    break;
                                case DATE:
                                    outValues.add(new Date(number));
                                    break;
                                default:
                                    throw new AssertionError("Unexpected PointType: " + type);
                            }
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case SORTED_SET:
                final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
                if (values != null && values.getValueCount() > 0) {
                    if (values.advance(localId) == localId) {
                        final List<Object> outValues = new LinkedList<>();
                        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
                            value = values.lookupOrd(ord);
                            outValues.add(schemaField.getType().toObject(schemaField, value));
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case NONE:
                break;
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) TrieIntField(org.apache.solr.schema.TrieIntField) ArrayList(java.util.ArrayList) TrieDateField(org.apache.solr.schema.TrieDateField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocValuesType(org.apache.lucene.index.DocValuesType) TrieFloatField(org.apache.solr.schema.TrieFloatField) BytesRef(org.apache.lucene.util.BytesRef) TrieDoubleField(org.apache.solr.schema.TrieDoubleField) EnumField(org.apache.solr.schema.EnumField) BoolField(org.apache.solr.schema.BoolField) LeafReader(org.apache.lucene.index.LeafReader) Date(java.util.Date) SortedDocValues(org.apache.lucene.index.SortedDocValues) LinkedList(java.util.LinkedList) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

BinaryDocValues (org.apache.lucene.index.BinaryDocValues)37 BytesRef (org.apache.lucene.util.BytesRef)29 Document (org.apache.lucene.document.Document)13 LeafReader (org.apache.lucene.index.LeafReader)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)12 NumericDocValues (org.apache.lucene.index.NumericDocValues)11 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)11 Directory (org.apache.lucene.store.Directory)10 ArrayList (java.util.ArrayList)9 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)6 Bits (org.apache.lucene.util.Bits)6 IOException (java.io.IOException)5 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5 IndexReader (org.apache.lucene.index.IndexReader)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5