Search in sources :

Example 46 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class FieldOptions method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;
    }
    NamedList<Object> termVectors = new NamedList<>();
    rb.rsp.add(TERM_VECTORS, termVectors);
    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
    }
    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.payloads = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }
    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<>();
    NamedList<List<String>> warnings = new NamedList<>();
    List<String> noTV = new ArrayList<>();
    List<String> noPos = new ArrayList<>();
    List<String> noOff = new ArrayList<>();
    List<String> noPay = new ArrayList<>();
    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {
            // workaround SOLR-3523
            if (null == field || "score".equals(field))
                continue;
            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);
            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                    option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
                    if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey) {
                        noPay.add(field);
                    }
                } else {
                    //field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    }
    // to be changed to account for that.
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
    }
    if (!noPay.isEmpty()) {
        warnings.add("noPayloads", noPay);
    }
    if (warnings.size() > 0) {
        termVectors.add(TV_KEY_WARNINGS, warnings);
    }
    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();
    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
    //Only load the id field to get the uniqueKey of that
    //field
    final String finalUniqFieldName = uniqFieldName;
    final List<String> uniqValues = new ArrayList<>();
    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {

        @Override
        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
            uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };
    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<>();
        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }
        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    TermsEnum termsEnum = vector.iterator();
                    mapOneVector(docNL, entry.getValue(), reader, docId, termsEnum, field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    TermsEnum termsEnum = terms.iterator();
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) DocListAndSet(org.apache.solr.search.DocListAndSet) ArrayList(java.util.ArrayList) TermsEnum(org.apache.lucene.index.TermsEnum) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) DocList(org.apache.solr.search.DocList) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) SolrException(org.apache.solr.common.SolrException) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SchemaField(org.apache.solr.schema.SchemaField) SolrReturnFields(org.apache.solr.search.SolrReturnFields) Fields(org.apache.lucene.index.Fields) ReturnFields(org.apache.solr.search.ReturnFields) IndexReader(org.apache.lucene.index.IndexReader) SolrParams(org.apache.solr.common.params.SolrParams) IndexSchema(org.apache.solr.schema.IndexSchema) HashMap(java.util.HashMap) Map(java.util.Map) DocList(org.apache.solr.search.DocList) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 47 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class BaseFragmentsBuilder method getFields.

protected Field[] getFields(IndexReader reader, int docId, final String fieldName) throws IOException {
    // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
    final List<Field> fields = new ArrayList<>();
    reader.document(docId, new StoredFieldVisitor() {

        @Override
        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
            String value = new String(bytes, StandardCharsets.UTF_8);
            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.setStoreTermVectors(fieldInfo.hasVectors());
            fields.add(new Field(fieldInfo.name, value, ft));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
        }
    });
    return fields.toArray(new Field[fields.size()]);
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) ArrayList(java.util.ArrayList) FieldInfo(org.apache.lucene.index.FieldInfo) FieldType(org.apache.lucene.document.FieldType)

Example 48 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class Geo3DPointOutsideDistanceComparator method getLeafComparator.

@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
    LeafReader reader = context.reader();
    FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info != null) {
        Geo3DDocValuesField.checkCompatible(info);
    }
    currentDocs = DocValues.getSortedNumeric(reader, field);
    return this;
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 49 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class LukeRequestHandler method getIndexedFieldsInfo.

private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req) throws Exception {
    SolrIndexSearcher searcher = req.getSearcher();
    SolrParams params = req.getParams();
    Set<String> fields = null;
    String fl = params.get(CommonParams.FL);
    if (fl != null) {
        fields = new TreeSet<>(Arrays.asList(fl.split("[,\\s]+")));
    }
    LeafReader reader = searcher.getSlowAtomicReader();
    IndexSchema schema = searcher.getSchema();
    // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
    Set<String> fieldNames = new TreeSet<>();
    for (FieldInfo fieldInfo : reader.getFieldInfos()) {
        fieldNames.add(fieldInfo.name);
    }
    // Walk the term enum and keep a priority queue for each map in our set
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
    for (String fieldName : fieldNames) {
        if (fields != null && !fields.contains(fieldName) && !fields.contains("*")) {
            //we're not interested in this field Still an issue here
            continue;
        }
        SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();
        SchemaField sfield = schema.getFieldOrNull(fieldName);
        FieldType ftype = (sfield == null) ? null : sfield.getType();
        fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName());
        fieldMap.add("schema", getFieldFlags(sfield));
        if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
            fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
        }
        Terms terms = reader.fields().terms(fieldName);
        if (terms == null) {
            // Not indexed, so we need to report what we can (it made it through the fl param if specified)
            finfo.add(fieldName, fieldMap);
            continue;
        }
        if (sfield != null && sfield.indexed()) {
            if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS, true)) {
                Document doc = getFirstLiveDoc(terms, reader);
                if (doc != null) {
                    // Found a document with this field
                    try {
                        IndexableField fld = doc.getField(fieldName);
                        if (fld != null) {
                            fieldMap.add("index", getFieldFlags(fld));
                        } else {
                            // it is a non-stored field...
                            fieldMap.add("index", "(unstored field)");
                        }
                    } catch (Exception ex) {
                        log.warn("error reading field: " + fieldName);
                    }
                }
            }
            fieldMap.add("docs", terms.getDocCount());
        }
        if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
            getDetailedFieldInfo(req, fieldName, fieldMap);
        }
        // Add the field
        finfo.add(fieldName, fieldMap);
    }
    return finfo;
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Document(org.apache.lucene.document.Document) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) TreeSet(java.util.TreeSet) SolrParams(org.apache.solr.common.params.SolrParams) IndexSchema(org.apache.solr.schema.IndexSchema) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 50 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class Lucene53NormsProducer method readFields.

private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
        FieldInfo info = infos.fieldInfo(fieldNumber);
        if (info == null) {
            throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
        } else if (!info.hasNorms()) {
            throw new CorruptIndexException("Invalid field: " + info.name, meta);
        }
        NormsEntry entry = new NormsEntry();
        entry.bytesPerValue = meta.readByte();
        switch(entry.bytesPerValue) {
            case 0:
            case 1:
            case 2:
            case 4:
            case 8:
                break;
            default:
                throw new CorruptIndexException("Invalid bytesPerValue: " + entry.bytesPerValue + ", field: " + info.name, meta);
        }
        entry.offset = meta.readLong();
        norms.put(info.number, entry);
        fieldNumber = meta.readVInt();
    }
}
Also used : CorruptIndexException(org.apache.lucene.index.CorruptIndexException) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput (org.apache.lucene.store.IndexOutput)6 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4