Search in sources :

Example 1 with StoredFieldVisitor

use of org.apache.lucene.index.StoredFieldVisitor in project elasticsearch by elastic.

the class SimpleLuceneTests method testOrdering.

/**
     * Here, we verify that the order that we add fields to a document counts, and not the lexi order
     * of the field. This means that heavily accessed fields that use field selector should be added
     * first (with load and break).
     */
public void testOrdering() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("#id", "1", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    final ArrayList<String> fieldsOrder = new ArrayList<>();
    searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {

        @Override
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            fieldsOrder.add(fieldInfo.name);
            return Status.YES;
        }
    });
    assertThat(fieldsOrder.size(), equalTo(2));
    assertThat(fieldsOrder.get(0), equalTo("_id"));
    assertThat(fieldsOrder.get(1), equalTo("#id"));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) FieldInfo(org.apache.lucene.index.FieldInfo) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with StoredFieldVisitor

use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.

the class SolrDocumentFetcher method visitFromCached.

/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
    for (IndexableField f : document) {
        final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
        final StoredFieldVisitor.Status needsField = visitor.needsField(info);
        if (needsField == StoredFieldVisitor.Status.STOP)
            return;
        if (needsField == StoredFieldVisitor.Status.NO)
            continue;
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
            continue;
        }
        Number numericValue = f.numericValue();
        if (numericValue != null) {
            if (numericValue instanceof Double) {
                visitor.doubleField(info, numericValue.doubleValue());
            } else if (numericValue instanceof Integer) {
                visitor.intField(info, numericValue.intValue());
            } else if (numericValue instanceof Float) {
                visitor.floatField(info, numericValue.floatValue());
            } else if (numericValue instanceof Long) {
                visitor.longField(info, numericValue.longValue());
            } else {
                throw new AssertionError();
            }
            continue;
        }
        // must be String
        if (f instanceof LargeLazyField) {
            // optimization to avoid premature string conversion
            visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
        } else {
            visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) DocumentStoredFieldVisitor(org.apache.lucene.document.DocumentStoredFieldVisitor) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with StoredFieldVisitor

use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.

the class DocNameExtractor method docName.

/**
   * Extract the name of the input doc from the index.
   * @param searcher access to the index.
   * @param docid ID of doc whose name is needed.
   * @return the name of the input doc as extracted from the index.
   * @throws IOException if cannot extract the doc name from the index.
   */
public String docName(IndexSearcher searcher, int docid) throws IOException {
    final List<String> name = new ArrayList<>();
    searcher.getIndexReader().document(docid, new StoredFieldVisitor() {

        @Override
        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
            String value = new String(bytes, StandardCharsets.UTF_8);
            name.add(value);
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            if (!name.isEmpty()) {
                return Status.STOP;
            } else if (fieldInfo.name.equals(docNameField)) {
                return Status.YES;
            } else {
                return Status.NO;
            }
        }
    });
    if (name.size() != 0) {
        return name.get(0);
    } else {
        return null;
    }
}
Also used : StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) ArrayList(java.util.ArrayList) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 4 with StoredFieldVisitor

use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.

the class FieldOptions method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;
    }
    NamedList<Object> termVectors = new NamedList<>();
    rb.rsp.add(TERM_VECTORS, termVectors);
    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
    }
    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.payloads = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }
    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<>();
    NamedList<List<String>> warnings = new NamedList<>();
    List<String> noTV = new ArrayList<>();
    List<String> noPos = new ArrayList<>();
    List<String> noOff = new ArrayList<>();
    List<String> noPay = new ArrayList<>();
    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {
            // workaround SOLR-3523
            if (null == field || "score".equals(field))
                continue;
            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);
            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                    option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
                    if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey) {
                        noPay.add(field);
                    }
                } else {
                    //field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    }
    // to be changed to account for that.
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
    }
    if (!noPay.isEmpty()) {
        warnings.add("noPayloads", noPay);
    }
    if (warnings.size() > 0) {
        termVectors.add(TV_KEY_WARNINGS, warnings);
    }
    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();
    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    //Only load the id field to get the uniqueKey of that
    //field
    final String finalUniqFieldName = uniqFieldName;
    final List<String> uniqValues = new ArrayList<>();
    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {

        @Override
        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
            uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };
    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<>();
        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }
        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    TermsEnum termsEnum = vector.iterator();
                    mapOneVector(docNL, entry.getValue(), reader, docId, termsEnum, field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    TermsEnum termsEnum = terms.iterator();
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) DocListAndSet(org.apache.solr.search.DocListAndSet) ArrayList(java.util.ArrayList) TermsEnum(org.apache.lucene.index.TermsEnum) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) DocList(org.apache.solr.search.DocList) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) SolrException(org.apache.solr.common.SolrException) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SchemaField(org.apache.solr.schema.SchemaField) SolrReturnFields(org.apache.solr.search.SolrReturnFields) Fields(org.apache.lucene.index.Fields) ReturnFields(org.apache.solr.search.ReturnFields) IndexReader(org.apache.lucene.index.IndexReader) SolrParams(org.apache.solr.common.params.SolrParams) IndexSchema(org.apache.solr.schema.IndexSchema) HashMap(java.util.HashMap) Map(java.util.Map) DocList(org.apache.solr.search.DocList) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 5 with StoredFieldVisitor

use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.

the class BaseFragmentsBuilder method getFields.

protected Field[] getFields(IndexReader reader, int docId, final String fieldName) throws IOException {
    // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
    final List<Field> fields = new ArrayList<>();
    reader.document(docId, new StoredFieldVisitor() {

        @Override
        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
            String value = new String(bytes, StandardCharsets.UTF_8);
            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.setStoreTermVectors(fieldInfo.hasVectors());
            fields.add(new Field(fieldInfo.name, value, ft));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
        }
    });
    return fields.toArray(new Field[fields.size()]);
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) ArrayList(java.util.ArrayList) FieldInfo(org.apache.lucene.index.FieldInfo) FieldType(org.apache.lucene.document.FieldType)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 ArrayList (java.util.ArrayList)4 TextField (org.apache.lucene.document.TextField)2 IndexReader (org.apache.lucene.index.IndexReader)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Document (org.apache.lucene.document.Document)1 DocumentStoredFieldVisitor (org.apache.lucene.document.DocumentStoredFieldVisitor)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 Fields (org.apache.lucene.index.Fields)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 IndexableField (org.apache.lucene.index.IndexableField)1 Term (org.apache.lucene.index.Term)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1